From c1f47b454ce759d7b13604137a233cad4617e1e8 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 21 Jun 2009 13:58:51 +0200
Subject: perf_counter tools: Fix vmlinux fallback when running on a different
 kernel

Lucas De Marchi reported that perf report and perf annotate
displays mismatching profile if a perf.data is analyzed on
an older kernel - even if the correct vmlinux is specified
via the -k option.

The reason is the fallback path in util/symbol.c:dso__load_kernel():

int dso__load_kernel(struct dso *self, const char *vmlinux,
                     symbol_filter_t filter, int verbose)
{
        int err = -1;

        if (vmlinux)
                err = dso__load_vmlinux(self, vmlinux, filter, verbose);

        if (err)
                err = dso__load_kallsyms(self, filter, verbose);

        return err;
}

dso__load_vmlinux() returns negative on error, but on success it
returns the number of symbols loaded - which confuses the function
to load the kallsyms.

This is normally harmless, as reporting is usually performed on the
same kernel that is analyzed - but if there's a mismatch then we
load the wrong kallsyms and create a non-sensical symbol tree.

The fix is to only fall back to kallsyms on errors.

Reported-by: Lucas De Marchi <lucas.de.marchi@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/symbol.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 86e1437..01b62fa 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -629,7 +629,7 @@ int dso__load_kernel(struct dso *self, const char *vmlinux,
 	if (vmlinux)
 		err = dso__load_vmlinux(self, vmlinux, filter, verbose);
 
-	if (err)
+	if (err < 0)
 		err = dso__load_kallsyms(self, filter, verbose);
 
 	return err;
-- 
cgit v1.1


From 51e268423151fc7bb41945bde7843160b6a14c32 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Mon, 22 Jun 2009 16:43:14 +0530
Subject: perf_counter tools: Define separate declarations for H/W and S/W
 events

Define separate declarations for H/W and S/W events to:

 1. Shorten name to save some space so that we can add more members
 2. Fix alignment
 3. Avoid declaring HARDWARE/SOFTWARE again and again.

Removed unused CR(x, y)

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <1245669194.17153.6.camel@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/parse-events.c | 44 +++++++++++++++++++++---------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 35d04da..12abab3 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -18,30 +18,30 @@ struct event_symbol {
 	char	*symbol;
 };
 
-#define C(x, y) .type = PERF_TYPE_##x, .config = PERF_COUNT_##y
-#define CR(x, y) .type = PERF_TYPE_##x, .config = y
+#define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x
+#define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x
 
 static struct event_symbol event_symbols[] = {
-  { C(HARDWARE, HW_CPU_CYCLES),		"cpu-cycles",		},
-  { C(HARDWARE, HW_CPU_CYCLES),		"cycles",		},
-  { C(HARDWARE, HW_INSTRUCTIONS),	"instructions",		},
-  { C(HARDWARE, HW_CACHE_REFERENCES),	"cache-references",	},
-  { C(HARDWARE, HW_CACHE_MISSES),	"cache-misses",		},
-  { C(HARDWARE, HW_BRANCH_INSTRUCTIONS),"branch-instructions",	},
-  { C(HARDWARE, HW_BRANCH_INSTRUCTIONS),"branches",		},
-  { C(HARDWARE, HW_BRANCH_MISSES),	"branch-misses",	},
-  { C(HARDWARE, HW_BUS_CYCLES),		"bus-cycles",		},
-
-  { C(SOFTWARE, SW_CPU_CLOCK),		"cpu-clock",		},
-  { C(SOFTWARE, SW_TASK_CLOCK),		"task-clock",		},
-  { C(SOFTWARE, SW_PAGE_FAULTS),	"page-faults",		},
-  { C(SOFTWARE, SW_PAGE_FAULTS),	"faults",		},
-  { C(SOFTWARE, SW_PAGE_FAULTS_MIN),	"minor-faults",		},
-  { C(SOFTWARE, SW_PAGE_FAULTS_MAJ),	"major-faults",		},
-  { C(SOFTWARE, SW_CONTEXT_SWITCHES),	"context-switches",	},
-  { C(SOFTWARE, SW_CONTEXT_SWITCHES),	"cs",			},
-  { C(SOFTWARE, SW_CPU_MIGRATIONS),	"cpu-migrations",	},
-  { C(SOFTWARE, SW_CPU_MIGRATIONS),	"migrations",		},
+  { CHW(CPU_CYCLES),		"cpu-cycles",		},
+  { CHW(CPU_CYCLES),		"cycles",		},
+  { CHW(INSTRUCTIONS),		"instructions",		},
+  { CHW(CACHE_REFERENCES),	"cache-references",	},
+  { CHW(CACHE_MISSES),		"cache-misses",		},
+  { CHW(BRANCH_INSTRUCTIONS),	"branch-instructions",	},
+  { CHW(BRANCH_INSTRUCTIONS),	"branches",		},
+  { CHW(BRANCH_MISSES),		"branch-misses",	},
+  { CHW(BUS_CYCLES),		"bus-cycles",		},
+
+  { CSW(CPU_CLOCK),		"cpu-clock",		},
+  { CSW(TASK_CLOCK),		"task-clock",		},
+  { CSW(PAGE_FAULTS),		"page-faults",		},
+  { CSW(PAGE_FAULTS),		"faults",		},
+  { CSW(PAGE_FAULTS_MIN),	"minor-faults",		},
+  { CSW(PAGE_FAULTS_MAJ),	"major-faults",		},
+  { CSW(CONTEXT_SWITCHES),	"context-switches",	},
+  { CSW(CONTEXT_SWITCHES),	"cs",			},
+  { CSW(CPU_MIGRATIONS),	"cpu-migrations",	},
+  { CSW(CPU_MIGRATIONS),	"migrations",		},
 };
 
 #define __PERF_COUNTER_FIELD(config, name) \
-- 
cgit v1.1


From 74d5b5889ea71a95d8924c08f8a7c6e2bdcbc0ba Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Mon, 22 Jun 2009 16:44:28 +0530
Subject: perf_counter tools: Introduce alias member in event_symbol

By introducing alias member in event_symbol :

1. duplicate lines are removed, like:
   cpu-cycles and cycles
   branch-instructions and branches
   context-switches and cs
   cpu-migrations and migrations

2. We can also add alias for another events.

Now ./perf list looks like :

List of pre-defined events (to be used in -e):

  cpu-cycles OR cycles                     [Hardware event]
  instructions                             [Hardware event]
  cache-references                         [Hardware event]
  cache-misses                             [Hardware event]
  branch-instructions OR branches          [Hardware event]
  branch-misses                            [Hardware event]
  bus-cycles                               [Hardware event]

  cpu-clock                                [Software event]
  task-clock                               [Software event]
  page-faults                              [Software event]
  faults                                   [Software event]
  minor-faults                             [Software event]
  major-faults                             [Software event]
  context-switches OR cs                   [Software event]
  cpu-migrations OR migrations             [Software event]

  rNNN                                     [raw hardware event descriptor]

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <1245669268.17153.8.camel@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/parse-events.c | 63 +++++++++++++++++++++++++-----------------
 1 file changed, 38 insertions(+), 25 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 12abab3..f569548 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -16,32 +16,29 @@ struct event_symbol {
 	u8	type;
 	u64	config;
 	char	*symbol;
+	char	*alias;
 };
 
 #define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x
 #define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x
 
 static struct event_symbol event_symbols[] = {
-  { CHW(CPU_CYCLES),		"cpu-cycles",		},
-  { CHW(CPU_CYCLES),		"cycles",		},
-  { CHW(INSTRUCTIONS),		"instructions",		},
-  { CHW(CACHE_REFERENCES),	"cache-references",	},
-  { CHW(CACHE_MISSES),		"cache-misses",		},
-  { CHW(BRANCH_INSTRUCTIONS),	"branch-instructions",	},
-  { CHW(BRANCH_INSTRUCTIONS),	"branches",		},
-  { CHW(BRANCH_MISSES),		"branch-misses",	},
-  { CHW(BUS_CYCLES),		"bus-cycles",		},
-
-  { CSW(CPU_CLOCK),		"cpu-clock",		},
-  { CSW(TASK_CLOCK),		"task-clock",		},
-  { CSW(PAGE_FAULTS),		"page-faults",		},
-  { CSW(PAGE_FAULTS),		"faults",		},
-  { CSW(PAGE_FAULTS_MIN),	"minor-faults",		},
-  { CSW(PAGE_FAULTS_MAJ),	"major-faults",		},
-  { CSW(CONTEXT_SWITCHES),	"context-switches",	},
-  { CSW(CONTEXT_SWITCHES),	"cs",			},
-  { CSW(CPU_MIGRATIONS),	"cpu-migrations",	},
-  { CSW(CPU_MIGRATIONS),	"migrations",		},
+  { CHW(CPU_CYCLES),		"cpu-cycles",		"cycles"	},
+  { CHW(INSTRUCTIONS),		"instructions",		""		},
+  { CHW(CACHE_REFERENCES),	"cache-references",	""		},
+  { CHW(CACHE_MISSES),		"cache-misses",		""		},
+  { CHW(BRANCH_INSTRUCTIONS),	"branch-instructions",	"branches"	},
+  { CHW(BRANCH_MISSES),		"branch-misses",	""		},
+  { CHW(BUS_CYCLES),		"bus-cycles",		""		},
+
+  { CSW(CPU_CLOCK),		"cpu-clock",		""		},
+  { CSW(TASK_CLOCK),		"task-clock",		""		},
+  { CSW(PAGE_FAULTS),		"page-faults",		""		},
+  { CSW(PAGE_FAULTS),		"faults",		""		},
+  { CSW(PAGE_FAULTS_MIN),	"minor-faults",		""		},
+  { CSW(PAGE_FAULTS_MAJ),	"major-faults",		""		},
+  { CSW(CONTEXT_SWITCHES),	"context-switches",	"cs"		},
+  { CSW(CPU_MIGRATIONS),	"cpu-migrations",	"migrations"	},
 };
 
 #define __PERF_COUNTER_FIELD(config, name) \
@@ -196,6 +193,19 @@ static int parse_generic_hw_symbols(const char *str, struct perf_counter_attr *a
 	return 0;
 }
 
+static int check_events(const char *str, unsigned int i)
+{
+	if (!strncmp(str, event_symbols[i].symbol,
+		     strlen(event_symbols[i].symbol)))
+		return 1;
+
+	if (strlen(event_symbols[i].alias))
+		if (!strncmp(str, event_symbols[i].alias,
+		      strlen(event_symbols[i].alias)))
+			return 1;
+	return 0;
+}
+
 /*
  * Each event can have multiple symbolic names.
  * Symbolic names are (almost) exactly matched.
@@ -235,9 +245,7 @@ static int parse_event_symbols(const char *str, struct perf_counter_attr *attr)
 	}
 
 	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
-		if (!strncmp(str, event_symbols[i].symbol,
-			     strlen(event_symbols[i].symbol))) {
-
+		if (check_events(str, i)) {
 			attr->type = event_symbols[i].type;
 			attr->config = event_symbols[i].config;
 
@@ -289,6 +297,7 @@ void print_events(void)
 {
 	struct event_symbol *syms = event_symbols;
 	unsigned int i, type, prev_type = -1;
+	char name[40];
 
 	fprintf(stderr, "\n");
 	fprintf(stderr, "List of pre-defined events (to be used in -e):\n");
@@ -301,14 +310,18 @@ void print_events(void)
 		if (type != prev_type)
 			fprintf(stderr, "\n");
 
-		fprintf(stderr, "  %-30s [%s]\n", syms->symbol,
+		if (strlen(syms->alias))
+			sprintf(name, "%s OR %s", syms->symbol, syms->alias);
+		else
+			strcpy(name, syms->symbol);
+		fprintf(stderr, "  %-40s [%s]\n", name,
 			event_type_descriptors[type]);
 
 		prev_type = type;
 	}
 
 	fprintf(stderr, "\n");
-	fprintf(stderr, "  %-30s [raw hardware event descriptor]\n",
+	fprintf(stderr, "  %-40s [raw hardware event descriptor]\n",
 		"rNNN");
 	fprintf(stderr, "\n");
 
-- 
cgit v1.1


From 520f2c346af463fa00924b236e092da482b344cc Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 22 Jun 2009 16:52:51 +0200
Subject: perf report: Output more symbol related debug data

Print more symbol relocation related info under -vv.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-report.c | 5 +++--
 tools/perf/util/symbol.c    | 4 ++++
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 5eb5566..ec230a0 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -797,7 +797,7 @@ resolve_symbol(struct thread *thread, struct map **mapp,
 {
 	struct dso *dso = dsop ? *dsop : NULL;
 	struct map *map = mapp ? *mapp : NULL;
-	uint64_t ip = *ipp;
+	u64 ip = *ipp;
 
 	if (!thread)
 		return NULL;
@@ -814,7 +814,6 @@ resolve_symbol(struct thread *thread, struct map **mapp,
 			*mapp = map;
 got_map:
 		ip = map->map_ip(map, ip);
-		*ipp  = ip;
 
 		dso = map->dso;
 	} else {
@@ -828,6 +827,8 @@ got_map:
 		dso = kernel_dso;
 	}
 	dprintf(" ...... dso: %s\n", dso ? dso->name : "<not found>");
+	dprintf(" ...... map: %Lx -> %Lx\n", *ipp, ip);
+	*ipp  = ip;
 
 	if (dsop)
 		*dsop = dso;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 01b62fa..9c659ef 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -535,6 +535,10 @@ static int dso__load_sym(struct dso *self, int fd, const char *name,
 		gelf_getshdr(sec, &shdr);
 		obj_start = sym.st_value;
 
+		if (verbose >= 2)
+			printf("adjusting symbol: st_value: %Lx sh_addr: %Lx sh_offset: %Lx\n",
+				(u64)sym.st_value, (u64)shdr.sh_addr, (u64)shdr.sh_offset);
+
 		sym.st_value -= shdr.sh_addr - shdr.sh_offset;
 
 		f = symbol__new(sym.st_value, sym.st_size,
-- 
cgit v1.1


From c0c22dbfa8ba3c5045eeb9c76d2822ffc44fefc3 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Mon, 22 Jun 2009 20:47:26 +0530
Subject: perf_counter tools: Set alias for page-faults

"faults" should be alias for "page-faults"

Also fixed alignment and 80 characters issue

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <1245683846.12092.1.camel@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/parse-events.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index f569548..06af2fa 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -33,8 +33,7 @@ static struct event_symbol event_symbols[] = {
 
   { CSW(CPU_CLOCK),		"cpu-clock",		""		},
   { CSW(TASK_CLOCK),		"task-clock",		""		},
-  { CSW(PAGE_FAULTS),		"page-faults",		""		},
-  { CSW(PAGE_FAULTS),		"faults",		""		},
+  { CSW(PAGE_FAULTS),		"page-faults",		"faults"	},
   { CSW(PAGE_FAULTS_MIN),	"minor-faults",		""		},
   { CSW(PAGE_FAULTS_MAJ),	"major-faults",		""		},
   { CSW(CONTEXT_SWITCHES),	"context-switches",	"cs"		},
@@ -71,24 +70,24 @@ static char *sw_event_names[] = {
 
 #define MAX_ALIASES 8
 
-static char *hw_cache [][MAX_ALIASES] = {
-	{ "L1-data"		, "l1-d", "l1d"					},
-	{ "L1-instruction"	, "l1-i", "l1i"					},
-	{ "L2"			, "l2"						},
-	{ "Data-TLB"		, "dtlb", "d-tlb"				},
-	{ "Instruction-TLB"	, "itlb", "i-tlb"				},
-	{ "Branch"		, "bpu" , "btb", "bpc"				},
+static char *hw_cache[][MAX_ALIASES] = {
+	{ "L1-data",		"l1-d",		"l1d"			},
+	{ "L1-instruction",	"l1-i",		"l1i"			},
+	{ "L2",			"l2"					},
+	{ "Data-TLB",		"dtlb",		"d-tlb"			},
+	{ "Instruction-TLB",	"itlb",		"i-tlb"			},
+	{ "Branch",		"bpu" ,		"btb",		"bpc"	},
 };
 
-static char *hw_cache_op [][MAX_ALIASES] = {
-	{ "Load"		, "read"					},
-	{ "Store"		, "write"					},
-	{ "Prefetch"		, "speculative-read", "speculative-load"	},
+static char *hw_cache_op[][MAX_ALIASES] = {
+	{ "Load",		"read"					},
+	{ "Store",		"write"					},
+	{ "Prefetch",		"speculative-read", "speculative-load"	},
 };
 
-static char *hw_cache_result [][MAX_ALIASES] = {
-	{ "Reference"		, "ops", "access"				},
-	{ "Miss"								},
+static char *hw_cache_result[][MAX_ALIASES] = {
+	{ "Reference",		"ops",		"access"		},
+	{ "Miss"							},
 };
 
 char *event_name(int counter)
@@ -160,7 +159,8 @@ static int parse_aliases(const char *str, char *names[][MAX_ALIASES], int size)
 	return -1;
 }
 
-static int parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr)
+static int
+parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr)
 {
 	int cache_type = -1, cache_op = 0, cache_result = 0;
 
@@ -201,7 +201,7 @@ static int check_events(const char *str, unsigned int i)
 
 	if (strlen(event_symbols[i].alias))
 		if (!strncmp(str, event_symbols[i].alias,
-		      strlen(event_symbols[i].alias)))
+			     strlen(event_symbols[i].alias)))
 			return 1;
 	return 0;
 }
-- 
cgit v1.1


From dee412066aeb16c43cf31599948c1a1de385df56 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Tue, 23 Jun 2009 02:22:39 +0530
Subject: perf stat: Fix command option / manpage

-l is not supported, it should be -S for scale.

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <1245703959.6167.16.camel@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/Documentation/perf-stat.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index c368a72..0d74346 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -8,8 +8,8 @@ perf-stat - Run a command and gather performance counter statistics
 SYNOPSIS
 --------
 [verse]
-'perf stat' [-e <EVENT> | --event=EVENT] [-l] [-a] <command>
-'perf stat' [-e <EVENT> | --event=EVENT] [-l] [-a] -- <command> [<options>]
+'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] <command>
+'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] -- <command> [<options>]
 
 DESCRIPTION
 -----------
@@ -40,7 +40,7 @@ OPTIONS
 -a::
         system-wide collection
 
--l::
+-S::
         scale counter values
 
 EXAMPLES
-- 
cgit v1.1


From 3d906ef10a539ff336010afab8f6f9c4fe379695 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 23 Jun 2009 11:23:07 +0200
Subject: perf_counter tools: Handle overlapping MMAP events

Martin Schwidefsky reported "perf report" symbol resolution
problems on S390.

Since we only report MMAP, not MUNMAP, we have to deal with
overlapping maps.

We used to simply throw out the old map on the assumption whole
maps got unmapped. This obviously doesn't deal with partial
unmaps. However it appears some dynamic linkers do fancy
partial unmaps (s390), so do something more elaborate and
truncate the old maps, only removing them when they've been
fully covered.

This resolves (part of) the S390 symbol resolution problems.

Reported-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Tested-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-report.c | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index ec230a0..b4e76f7 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -400,9 +400,27 @@ static void thread__insert_map(struct thread *self, struct map *map)
 
 	list_for_each_entry_safe(pos, tmp, &self->maps, node) {
 		if (map__overlap(pos, map)) {
-			list_del_init(&pos->node);
-			/* XXX leaks dsos */
-			free(pos);
+			if (verbose >= 2) {
+				printf("overlapping maps:\n");
+				map__fprintf(map, stdout);
+				map__fprintf(pos, stdout);
+			}
+
+			if (map->start <= pos->start && map->end > pos->start)
+				pos->start = map->end;
+
+			if (map->end >= pos->end && map->start < pos->end)
+				pos->end = map->start;
+
+			if (verbose >= 2) {
+				printf("after collision:\n");
+				map__fprintf(pos, stdout);
+			}
+
+			if (pos->start >= pos->end) {
+				list_del_init(&pos->node);
+				free(pos);
+			}
 		}
 	}
 
-- 
cgit v1.1


From b0a28589b2fc9bee8ed83dee006a497d1ce93841 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 23 Jun 2009 16:39:53 +0200
Subject: perf report: Fix help text typo

Reported-by: Brice Goglin <Brice.Goglin@inria.fr>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/Documentation/perf-report.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 52d3fc6..40c1db8 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -13,7 +13,7 @@ SYNOPSIS
 DESCRIPTION
 -----------
 This command displays the performance counter profile information recorded
-via perf report.
+via perf record.
 
 OPTIONS
 -------
-- 
cgit v1.1


From cca03c0aeb18a975abec28df518a2b64ae3e6964 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Tue, 23 Jun 2009 17:12:49 +0530
Subject: perf stat: Fix verbose for perf stat

Error message should use stderr for verbose (-v), otherwise
message will be lost for:

 $ ./perf stat -v <cmd>  > /dev/null

For example on AMD bus-cycles event is not available so now
it looks like:

 $ ./perf stat -v -e bus-cycles ls > /dev/null
Error: counter 0, sys_perf_counter_open() syscall returned with -1 (Invalid argument)

 Performance counter stats for 'ls':

  <not counted>  bus-cycles

    0.006765877  seconds time elapsed.

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <1245757369.3776.1.camel@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-stat.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 6d3eeac..5e04fcc 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -109,6 +109,10 @@ static u64			walltime_nsecs_noise;
 static u64			runtime_cycles_avg;
 static u64			runtime_cycles_noise;
 
+
+#define ERR_PERF_OPEN \
+"Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n"
+
 static void create_perf_stat_counter(int counter)
 {
 	struct perf_counter_attr *attr = attrs + counter;
@@ -119,20 +123,20 @@ static void create_perf_stat_counter(int counter)
 
 	if (system_wide) {
 		int cpu;
-		for (cpu = 0; cpu < nr_cpus; cpu ++) {
+		for (cpu = 0; cpu < nr_cpus; cpu++) {
 			fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0);
-			if (fd[cpu][counter] < 0 && verbose) {
-				printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[cpu][counter], strerror(errno));
-			}
+			if (fd[cpu][counter] < 0 && verbose)
+				fprintf(stderr, ERR_PERF_OPEN, counter,
+					fd[cpu][counter], strerror(errno));
 		}
 	} else {
 		attr->inherit	= inherit;
 		attr->disabled	= 1;
 
 		fd[0][counter] = sys_perf_counter_open(attr, 0, -1, -1, 0);
-		if (fd[0][counter] < 0 && verbose) {
-			printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[0][counter], strerror(errno));
-		}
+		if (fd[0][counter] < 0 && verbose)
+			fprintf(stderr, ERR_PERF_OPEN, counter,
+				fd[0][counter], strerror(errno));
 	}
 }
 
@@ -168,7 +172,7 @@ static void read_counter(int counter)
 	count[0] = count[1] = count[2] = 0;
 
 	nv = scale ? 3 : 1;
-	for (cpu = 0; cpu < nr_cpus; cpu ++) {
+	for (cpu = 0; cpu < nr_cpus; cpu++) {
 		if (fd[cpu][counter] < 0)
 			continue;
 
-- 
cgit v1.1


From f7679dabfaf69840b000d238a020cee7157aca17 Mon Sep 17 00:00:00 2001
From: Roel Kluin <roel.kluin@gmail.com>
Date: Mon, 22 Jun 2009 18:42:33 +0200
Subject: perf_counter tools: Fix strbuf_fread() error path handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

size_t res cannot be less than 0 - fread returns 0 on error.

[ Updated by: René Scharfe <rene.scharfe@lsrfire.ath.cx> ]

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Junio C Hamano <gitster@pobox.com>
LKML-Reference: <4A3FB479.2090902@lsrfire.ath.cx>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/strbuf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c
index eaba093..464e7ca 100644
--- a/tools/perf/util/strbuf.c
+++ b/tools/perf/util/strbuf.c
@@ -259,7 +259,7 @@ size_t strbuf_fread(struct strbuf *sb, size_t size, FILE *f)
 	res = fread(sb->buf + sb->len, 1, size, f);
 	if (res > 0)
 		strbuf_setlen(sb, sb->len + res);
-	else if (res < 0 && oldalloc == 0)
+	else if (oldalloc == 0)
 		strbuf_release(sb);
 	return res;
 }
-- 
cgit v1.1


From 3d63259583278262d9153316094e315f73ebfcb5 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Wed, 24 Jun 2009 18:19:34 +0530
Subject: perf stat: Remove dead code

Remove dead code and do some code alignment.

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <1245847774.2681.2.camel@ht.satnam>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-stat.c | 44 +++++++++++++-------------------------------
 1 file changed, 13 insertions(+), 31 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 5e04fcc..8420ec5 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -59,42 +59,27 @@ static struct perf_counter_attr default_attrs[MAX_COUNTERS] = {
 
 };
 
+#define MAX_RUN			100
+
 static int			system_wide			=  0;
-static int			inherit				=  1;
 static int			verbose				=  0;
-
-static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
-
-static int			target_pid			= -1;
 static int			nr_cpus				=  0;
-static unsigned int		page_size;
+static int			run_idx				=  0;
 
+static int			run_count			=  1;
+static int			inherit				=  1;
 static int			scale				=  1;
+static int			target_pid			= -1;
 
-static const unsigned int default_count[] = {
-	1000000,
-	1000000,
-	  10000,
-	  10000,
-	1000000,
-	  10000,
-};
-
-#define MAX_RUN 100
-
-static int			run_count		=  1;
-static int			run_idx			=  0;
-
-static u64			event_res[MAX_RUN][MAX_COUNTERS][3];
-static u64			event_scaled[MAX_RUN][MAX_COUNTERS];
-
-//static u64			event_hist[MAX_RUN][MAX_COUNTERS][3];
-
+static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
 
 static u64			runtime_nsecs[MAX_RUN];
 static u64			walltime_nsecs[MAX_RUN];
 static u64			runtime_cycles[MAX_RUN];
 
+static u64			event_res[MAX_RUN][MAX_COUNTERS][3];
+static u64			event_scaled[MAX_RUN][MAX_COUNTERS];
+
 static u64			event_res_avg[MAX_COUNTERS][3];
 static u64			event_res_noise[MAX_COUNTERS][3];
 
@@ -109,7 +94,6 @@ static u64			walltime_nsecs_noise;
 static u64			runtime_cycles_avg;
 static u64			runtime_cycles_noise;
 
-
 #define ERR_PERF_OPEN \
 "Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n"
 
@@ -470,9 +454,9 @@ static const struct option options[] = {
 	OPT_INTEGER('p', "pid", &target_pid,
 		    "stat events on existing pid"),
 	OPT_BOOLEAN('a', "all-cpus", &system_wide,
-			    "system-wide collection from all CPUs"),
+		    "system-wide collection from all CPUs"),
 	OPT_BOOLEAN('S', "scale", &scale,
-			    "scale/normalize counters"),
+		    "scale/normalize counters"),
 	OPT_BOOLEAN('v', "verbose", &verbose,
 		    "be more verbose (show counter open errors, etc)"),
 	OPT_INTEGER('r', "repeat", &run_count,
@@ -484,8 +468,6 @@ int cmd_stat(int argc, const char **argv, const char *prefix)
 {
 	int status;
 
-	page_size = sysconf(_SC_PAGE_SIZE);
-
 	memcpy(attrs, default_attrs, sizeof(attrs));
 
 	argc = parse_options(argc, argv, options, stat_usage, 0);
@@ -515,7 +497,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix)
 	status = 0;
 	for (run_idx = 0; run_idx < run_count; run_idx++) {
 		if (run_count != 1 && verbose)
-			fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx+1);
+			fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx + 1);
 		status = run_perf_stat(argc, argv);
 	}
 
-- 
cgit v1.1


From 1b173f77dd0d5fd4f0ff18034aaa79e30da068b9 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 24 Jun 2009 19:54:29 +0200
Subject: perf_counter tools: Add CREDITS file for Git contributors

Much of perf's libraries comes from the Git project. I noticed
that the files (in tools/perf/util/*.[ch] and elsewhere) are
quite spartan wrt. credits, so lets add a CREDITS file that
includes an (incomplete!) list of main contributors.

Thanks guys, these libraries are really useful. Special thanks
go to Johannes Schindelin and Junio C Hamano for coming up with
this list.

List-Composed-By: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Cc: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/CREDITS | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)
 create mode 100644 tools/perf/CREDITS

(limited to 'tools')

diff --git a/tools/perf/CREDITS b/tools/perf/CREDITS
new file mode 100644
index 0000000..c2ddcb3
--- /dev/null
+++ b/tools/perf/CREDITS
@@ -0,0 +1,30 @@
+Most of the infrastructure that 'perf' uses here has been reused
+from the Git project, as of version:
+
+    66996ec: Sync with 1.6.2.4
+
+Here is an (incomplete!) list of main contributors to those files
+in util/* and elsewhere:
+
+ Alex Riesen
+ Christian Couder
+ Dmitry Potapov
+ Jeff King
+ Johannes Schindelin
+ Johannes Sixt
+ Junio C Hamano
+ Linus Torvalds
+ Matthias Kestenholz
+ Michal Ostrowski
+ Miklos Vajna
+ Petr Baudis
+ Pierre Habouzit
+ René Scharfe
+ Samuel Tardieu
+ Shawn O. Pearce
+ Steffen Prohaska
+ Steve Haslam
+
+Thanks guys!
+
+The full history of the files can be found in the upstream Git commits.
-- 
cgit v1.1


From 76c64c5e4c47b6d28deb3cae8dfa07a93c2229dc Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Wed, 24 Jun 2009 21:08:36 +0200
Subject: perf record: Fix filemap pathname parsing in /proc/pid/maps

Looking backward for the first space from the end of a line in
/proc/pid/maps does not find the start of the pathname of the mapped
file if it contains a space.

Since the only slashes we have in this file occur in the (absolute!)
pathname column of file mappings, looking for the first slash in a
line is a safe method to find the name.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Stefani Seibold <stefani@seibold.net>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20090624190835.GA25548@cmpxchg.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-record.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index d7ebbd7..9b899ba 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -306,12 +306,11 @@ static void pid_synthesize_mmap_samples(pid_t pid)
 			continue;
 		pbf += n + 3;
 		if (*pbf == 'x') { /* vm_exec */
-			char *execname = strrchr(bf, ' ');
+			char *execname = strchr(bf, '/');
 
-			if (execname == NULL || execname[1] != '/')
+			if (execname == NULL)
 				continue;
 
-			execname += 1;
 			size = strlen(execname);
 			execname[size - 1] = '\0'; /* Remove \n */
 			memcpy(mmap_ev.filename, execname, size);
-- 
cgit v1.1


From 06813f6c743420c16f9248ab59bd2e68a2de57ba Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Thu, 25 Jun 2009 17:16:07 +0530
Subject: perf_counter tools: Check for valid cache operations

Made new table for cache operartion stat 'hw_cache_stat' as:

 L1I : Read and prefetch only
 ITLB and BPU : Read-only

introduce is_cache_op_valid() for cache operation validity

And checks for valid cache operations.

Reported-by : Ingo Molnar <mingo@elte.hu>
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <1245930367.5308.33.camel@localhost.localdomain>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/parse-events.c | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 06af2fa..7939a21 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -90,6 +90,34 @@ static char *hw_cache_result[][MAX_ALIASES] = {
 	{ "Miss"							},
 };
 
+#define C(x)		PERF_COUNT_HW_CACHE_##x
+#define CACHE_READ	(1 << C(OP_READ))
+#define CACHE_WRITE	(1 << C(OP_WRITE))
+#define CACHE_PREFETCH	(1 << C(OP_PREFETCH))
+#define COP(x)		(1 << x)
+
+/*
+ * cache operartion stat
+ * L1I : Read and prefetch only
+ * ITLB and BPU : Read-only
+ */
+static unsigned long hw_cache_stat[C(MAX)] = {
+ [C(L1D)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
+ [C(L1I)]	= (CACHE_READ | CACHE_PREFETCH),
+ [C(LL)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
+ [C(DTLB)]	= (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
+ [C(ITLB)]	= (CACHE_READ),
+ [C(BPU)]	= (CACHE_READ),
+};
+
+static int is_cache_op_valid(u8 cache_type, u8 cache_op)
+{
+	if (hw_cache_stat[cache_type] & COP(cache_op))
+		return 1;	/* valid */
+	else
+		return 0;	/* invalid */
+}
+
 char *event_name(int counter)
 {
 	u64 config = attrs[counter].config;
@@ -123,6 +151,8 @@ char *event_name(int counter)
 		if (cache_result > PERF_COUNT_HW_CACHE_RESULT_MAX)
 			return "unknown-ext-hardware-cache-result";
 
+		if (!is_cache_op_valid(cache_type, cache_op))
+			return "invalid-cache";
 		sprintf(name, "%s-Cache-%s-%ses",
 			hw_cache[cache_type][0],
 			hw_cache_op[cache_op][0],
@@ -179,6 +209,9 @@ parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr)
 	if (cache_op == -1)
 		cache_op = PERF_COUNT_HW_CACHE_OP_READ;
 
+	if (!is_cache_op_valid(cache_type, cache_op))
+		return -EINVAL;
+
 	cache_result = parse_aliases(str, hw_cache_result,
 					PERF_COUNT_HW_CACHE_RESULT_MAX);
 	/*
-- 
cgit v1.1


From e5c59547791f171b280bc4c4b2c3ff171824c1a3 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Thu, 25 Jun 2009 18:25:22 +0530
Subject: perf_counter tools: Shorten names for events

Added new alias for events.

On AMD box:

 $ ./perf stat -e l1d -e l1d-misses -e l1d-write -e l1d-prefetch -e l1d-prefetch-miss -e l1i -e l1i-misses -e l1i-prefetch -e l2 -e l2-misses -e l2-write -e dtlb -e dtlb-misses -e itlb -e itlb-misses -e bpu -e bpu-misses -- ls -lR /usr/include/ > /dev/null

Before :

 Performance counter stats for 'ls -lR /usr/include/':

      248064467  L1-data-Cache-Load-Referencees  (scaled from 23.27%)
        1001433  L1-data-Cache-Load-Misses  (scaled from 23.34%)
         153691  L1-data-Cache-Store-Referencees  (scaled from 23.34%)
         423248  L1-data-Cache-Prefetch-Referencees  (scaled from 23.33%)
         302138  L1-data-Cache-Prefetch-Misses  (scaled from 23.25%)
      251217546  L1-instruction-Cache-Load-Referencees  (scaled from 23.25%)
        5757005  L1-instruction-Cache-Load-Misses  (scaled from 23.23%)
          93435  L1-instruction-Cache-Prefetch-Referencees  (scaled from 23.24%)
        6496073  L2-Cache-Load-Referencees  (scaled from 23.32%)
         609485  L2-Cache-Load-Misses  (scaled from 23.45%)
        6876991  L2-Cache-Store-Referencees  (scaled from 23.71%)
      248922840  Data-TLB-Cache-Load-Referencees  (scaled from 23.94%)
        5828386  Data-TLB-Cache-Load-Misses  (scaled from 24.17%)
      257613506  Instruction-TLB-Cache-Load-Referencees  (scaled from 24.20%)
           6833  Instruction-TLB-Cache-Load-Misses  (scaled from 23.88%)
      109043606  Branch-Cache-Load-Referencees  (scaled from 23.64%)
        5552296  Branch-Cache-Load-Misses  (scaled from 23.42%)

    0.413702461  seconds time elapsed.

After :

 Peformance counter stats for 'ls -lR /usr/include/':

      266590464  L1-d$-loads           (scaled from 23.03%)
        1222273  L1-d$-load-misses     (scaled from 23.58%)
         146204  L1-d$-stores          (scaled from 23.83%)
         406344  L1-d$-prefetches      (scaled from 24.09%)
         283748  L1-d$-prefetch-misses (scaled from 24.10%)
      249650965  L1-i$-loads           (scaled from 23.80%)
        3353961  L1-i$-load-misses     (scaled from 23.82%)
         104599  L1-i$-prefetches      (scaled from 23.68%)
        4836405  LLC-loads             (scaled from 23.67%)
         498214  LLC-load-misses       (scaled from 23.66%)
        4953994  LLC-stores            (scaled from 23.64%)
      243354097  dTLB-loads            (scaled from 23.77%)
        6468584  dTLB-load-misses      (scaled from 23.74%)
      249719549  iTLB-loads            (scaled from 23.25%)
           5060  iTLB-load-misses      (scaled from 23.00%)
      112343016  branch-loads          (scaled from 22.76%)
        5528876  branch-load-misses    (scaled from 22.54%)

    0.427154051  seconds time elapsed.

Reported-by : Ingo Molnar <mingo@elte.hu>
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <1245934522.5308.39.camel@hpdv5.satnam>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/parse-events.c | 45 ++++++++++++++++++++++++++----------------
 1 file changed, 28 insertions(+), 17 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 7939a21..430f060 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -71,23 +71,23 @@ static char *sw_event_names[] = {
 #define MAX_ALIASES 8
 
 static char *hw_cache[][MAX_ALIASES] = {
-	{ "L1-data",		"l1-d",		"l1d"			},
-	{ "L1-instruction",	"l1-i",		"l1i"			},
-	{ "L2",			"l2"					},
-	{ "Data-TLB",		"dtlb",		"d-tlb"			},
-	{ "Instruction-TLB",	"itlb",		"i-tlb"			},
-	{ "Branch",		"bpu" ,		"btb",		"bpc"	},
+ { "L1-d$",	"l1-d",		"L1-data",				},
+ { "L1-i$",	"l1-i",		"L1-instruction",			},
+ { "LLC",	"L2"							},
+ { "dTLB",	"d-tlb",	"Data-TLB",				},
+ { "iTLB",	"i-tlb",	"Instruction-TLB",			},
+ { "branch",	"branches",	"bpu",		"btb",		"bpc",	},
 };
 
 static char *hw_cache_op[][MAX_ALIASES] = {
-	{ "Load",		"read"					},
-	{ "Store",		"write"					},
-	{ "Prefetch",		"speculative-read", "speculative-load"	},
+ { "load",	"loads",	"read",					},
+ { "store",	"stores",	"write",				},
+ { "prefetch",	"prefetches",	"speculative-read", "speculative-load",	},
 };
 
 static char *hw_cache_result[][MAX_ALIASES] = {
-	{ "Reference",		"ops",		"access"		},
-	{ "Miss"							},
+ { "refs",	"Reference",	"ops",		"access",		},
+ { "misses",	"miss",							},
 };
 
 #define C(x)		PERF_COUNT_HW_CACHE_##x
@@ -118,6 +118,22 @@ static int is_cache_op_valid(u8 cache_type, u8 cache_op)
 		return 0;	/* invalid */
 }
 
+static char *event_cache_name(u8 cache_type, u8 cache_op, u8 cache_result)
+{
+	static char name[50];
+
+	if (cache_result) {
+		sprintf(name, "%s-%s-%s", hw_cache[cache_type][0],
+			hw_cache_op[cache_op][0],
+			hw_cache_result[cache_result][0]);
+	} else {
+		sprintf(name, "%s-%s", hw_cache[cache_type][0],
+			hw_cache_op[cache_op][1]);
+	}
+
+	return name;
+}
+
 char *event_name(int counter)
 {
 	u64 config = attrs[counter].config;
@@ -137,7 +153,6 @@ char *event_name(int counter)
 
 	case PERF_TYPE_HW_CACHE: {
 		u8 cache_type, cache_op, cache_result;
-		static char name[100];
 
 		cache_type   = (config >>  0) & 0xff;
 		if (cache_type > PERF_COUNT_HW_CACHE_MAX)
@@ -153,12 +168,8 @@ char *event_name(int counter)
 
 		if (!is_cache_op_valid(cache_type, cache_op))
 			return "invalid-cache";
-		sprintf(name, "%s-Cache-%s-%ses",
-			hw_cache[cache_type][0],
-			hw_cache_op[cache_op][0],
-			hw_cache_result[cache_result][0]);
 
-		return name;
+		return event_cache_name(cache_type, cache_op, cache_result);
 	}
 
 	case PERF_TYPE_SOFTWARE:
-- 
cgit v1.1


From 7c6a1c65bbd3be688e581511f45818663efc1877 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 25 Jun 2009 17:05:54 +0200
Subject: perf_counter tools: Rework the file format

Create a structured file format that includes the full
perf_counter_attr and all its relevant counter IDs so that
the reporting program has full information.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/Makefile         |   3 +-
 tools/perf/builtin-record.c | 100 +++++++++++-------
 tools/perf/builtin-report.c |  37 +++++--
 tools/perf/perf.h           |   8 +-
 tools/perf/types.h          |  17 ----
 tools/perf/util/header.c    | 242 ++++++++++++++++++++++++++++++++++++++++++++
 tools/perf/util/header.h    |  37 +++++++
 tools/perf/util/string.h    |   2 +-
 tools/perf/util/symbol.h    |   2 +-
 tools/perf/util/types.h     |  17 ++++
 10 files changed, 394 insertions(+), 71 deletions(-)
 delete mode 100644 tools/perf/types.h
 create mode 100644 tools/perf/util/header.c
 create mode 100644 tools/perf/util/header.h
 create mode 100644 tools/perf/util/types.h

(limited to 'tools')

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 36d7eef..d3887ed 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -290,7 +290,7 @@ LIB_FILE=libperf.a
 
 LIB_H += ../../include/linux/perf_counter.h
 LIB_H += perf.h
-LIB_H += types.h
+LIB_H += util/types.h
 LIB_H += util/list.h
 LIB_H += util/rbtree.h
 LIB_H += util/levenshtein.h
@@ -328,6 +328,7 @@ LIB_OBJS += util/sigchain.o
 LIB_OBJS += util/symbol.o
 LIB_OBJS += util/color.o
 LIB_OBJS += util/pager.o
+LIB_OBJS += util/header.o
 
 BUILTIN_OBJS += builtin-annotate.o
 BUILTIN_OBJS += builtin-help.o
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 9b899ba..f4f0240 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -14,6 +14,8 @@
 #include "util/parse-events.h"
 #include "util/string.h"
 
+#include "util/header.h"
+
 #include <unistd.h>
 #include <sched.h>
 
@@ -52,7 +54,8 @@ static int			nr_poll;
 static int			nr_cpu;
 
 static int			file_new = 1;
-static struct perf_file_header	file_header;
+
+struct perf_header		*header;
 
 struct mmap_event {
 	struct perf_event_header	header;
@@ -328,7 +331,7 @@ static void pid_synthesize_mmap_samples(pid_t pid)
 	fclose(fp);
 }
 
-static void synthesize_samples(void)
+static void synthesize_all(void)
 {
 	DIR *proc;
 	struct dirent dirent, *next;
@@ -352,10 +355,35 @@ static void synthesize_samples(void)
 
 static int group_fd;
 
+static struct perf_header_attr *get_header_attr(struct perf_counter_attr *a, int nr)
+{
+	struct perf_header_attr *h_attr;
+
+	if (nr < header->attrs) {
+		h_attr = header->attr[nr];
+	} else {
+		h_attr = perf_header_attr__new(a);
+		perf_header__add_attr(header, h_attr);
+	}
+
+	return h_attr;
+}
+
 static void create_counter(int counter, int cpu, pid_t pid)
 {
 	struct perf_counter_attr *attr = attrs + counter;
-	int track = 1;
+	struct perf_header_attr *h_attr;
+	int track = !counter; /* only the first counter needs these */
+	struct {
+		u64 count;
+		u64 time_enabled;
+		u64 time_running;
+		u64 id;
+	} read_data;
+
+	attr->read_format	= PERF_FORMAT_TOTAL_TIME_ENABLED |
+				  PERF_FORMAT_TOTAL_TIME_RUNNING |
+				  PERF_FORMAT_ID;
 
 	attr->sample_type	= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
 
@@ -368,22 +396,11 @@ static void create_counter(int counter, int cpu, pid_t pid)
 	if (call_graph)
 		attr->sample_type	|= PERF_SAMPLE_CALLCHAIN;
 
-	if (file_new) {
-		file_header.sample_type = attr->sample_type;
-	} else {
-		if (file_header.sample_type != attr->sample_type) {
-			fprintf(stderr, "incompatible append\n");
-			exit(-1);
-		}
-	}
-
 	attr->mmap		= track;
 	attr->comm		= track;
 	attr->inherit		= (cpu < 0) && inherit;
 	attr->disabled		= 1;
 
-	track = 0; /* only the first counter needs these */
-
 try_again:
 	fd[nr_cpu][counter] = sys_perf_counter_open(attr, pid, cpu, group_fd, 0);
 
@@ -414,6 +431,19 @@ try_again:
 		exit(-1);
 	}
 
+	h_attr = get_header_attr(attr, counter);
+
+	if (!file_new) {
+		if (memcmp(&h_attr->attr, attr, sizeof(*attr))) {
+			fprintf(stderr, "incompatible append\n");
+			exit(-1);
+		}
+	}
+
+	read(fd[nr_cpu][counter], &read_data, sizeof(read_data));
+
+	perf_header_attr__add_id(h_attr, read_data.id);
+
 	assert(fd[nr_cpu][counter] >= 0);
 	fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK);
 
@@ -444,11 +474,6 @@ static void open_counters(int cpu, pid_t pid)
 {
 	int counter;
 
-	if (pid > 0) {
-		pid_synthesize_comm_event(pid, 0);
-		pid_synthesize_mmap_samples(pid);
-	}
-
 	group_fd = -1;
 	for (counter = 0; counter < nr_counters; counter++)
 		create_counter(counter, cpu, pid);
@@ -458,17 +483,16 @@ static void open_counters(int cpu, pid_t pid)
 
 static void atexit_header(void)
 {
-	file_header.data_size += bytes_written;
+	header->data_size += bytes_written;
 
-	if (pwrite(output, &file_header, sizeof(file_header), 0) == -1)
-		perror("failed to write on file headers");
+	perf_header__write(header, output);
 }
 
 static int __cmd_record(int argc, const char **argv)
 {
 	int i, counter;
 	struct stat st;
-	pid_t pid;
+	pid_t pid = 0;
 	int flags;
 	int ret;
 
@@ -499,22 +523,31 @@ static int __cmd_record(int argc, const char **argv)
 		exit(-1);
 	}
 
-	if (!file_new) {
-		if (read(output, &file_header, sizeof(file_header)) == -1) {
-			perror("failed to read file headers");
-			exit(-1);
-		}
-
-		lseek(output, file_header.data_size, SEEK_CUR);
-	}
+	if (!file_new)
+		header = perf_header__read(output);
+	else
+		header = perf_header__new();
 
 	atexit(atexit_header);
 
 	if (!system_wide) {
-		open_counters(-1, target_pid != -1 ? target_pid : getpid());
+		pid = target_pid;
+		if (pid == -1)
+			pid = getpid();
+
+		open_counters(-1, pid);
 	} else for (i = 0; i < nr_cpus; i++)
 		open_counters(i, target_pid);
 
+	if (file_new)
+		perf_header__write(header, output);
+
+	if (!system_wide) {
+		pid_synthesize_comm_event(pid, 0);
+		pid_synthesize_mmap_samples(pid);
+	} else
+		synthesize_all();
+
 	if (target_pid == -1 && argc) {
 		pid = fork();
 		if (pid < 0)
@@ -538,9 +571,6 @@ static int __cmd_record(int argc, const char **argv)
 		}
 	}
 
-	if (system_wide)
-		synthesize_samples();
-
 	while (!done) {
 		int hits = samples;
 
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index b4e76f7..e575f30 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -17,6 +17,7 @@
 #include "util/string.h"
 
 #include "perf.h"
+#include "util/header.h"
 
 #include "util/parse-options.h"
 #include "util/parse-events.h"
@@ -1385,13 +1386,27 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
 	return 0;
 }
 
-static struct perf_file_header		file_header;
+static struct perf_header	*header;
+
+static int perf_header__has_sample(u64 sample_mask)
+{
+	int i;
+
+	for (i = 0; i < header->attrs; i++) {
+		struct perf_header_attr *attr = header->attr[i];
+
+		if (!(attr->attr.sample_type & sample_mask))
+			return 0;
+	}
+
+	return 1;
+}
 
 static int __cmd_report(void)
 {
 	int ret, rc = EXIT_FAILURE;
 	unsigned long offset = 0;
-	unsigned long head = sizeof(file_header);
+	unsigned long head, shift;
 	struct stat stat;
 	event_t *event;
 	uint32_t size;
@@ -1419,13 +1434,11 @@ static int __cmd_report(void)
 		exit(0);
 	}
 
-	if (read(input, &file_header, sizeof(file_header)) == -1) {
-		perror("failed to read file headers");
-		exit(-1);
-	}
+	header = perf_header__read(input);
+	head = header->data_offset;
 
 	if (sort__has_parent &&
-	    !(file_header.sample_type & PERF_SAMPLE_CALLCHAIN)) {
+	    !perf_header__has_sample(PERF_SAMPLE_CALLCHAIN)) {
 		fprintf(stderr, "selected --sort parent, but no callchain data\n");
 		exit(-1);
 	}
@@ -1445,6 +1458,11 @@ static int __cmd_report(void)
 		cwd = NULL;
 		cwdlen = 0;
 	}
+
+	shift = page_size * (head / page_size);
+	offset += shift;
+	head -= shift;
+
 remap:
 	buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
 			   MAP_SHARED, input, offset);
@@ -1461,9 +1479,10 @@ more:
 		size = 8;
 
 	if (head + event->header.size >= page_size * mmap_window) {
-		unsigned long shift = page_size * (head / page_size);
 		int ret;
 
+		shift = page_size * (head / page_size);
+
 		ret = munmap(buf, page_size * mmap_window);
 		assert(ret == 0);
 
@@ -1501,7 +1520,7 @@ more:
 
 	head += size;
 
-	if (offset + head >= sizeof(file_header) + file_header.data_size)
+	if (offset + head >= header->data_offset + header->data_size)
 		goto done;
 
 	if (offset + head < stat.st_size)
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index bccb529..16c84fd 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -19,7 +19,7 @@
 #include <sys/syscall.h>
 
 #include "../../include/linux/perf_counter.h"
-#include "types.h"
+#include "util/types.h"
 
 /*
  * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
@@ -66,10 +66,4 @@ sys_perf_counter_open(struct perf_counter_attr *attr,
 #define MAX_COUNTERS			256
 #define MAX_NR_CPUS			256
 
-struct perf_file_header {
-	u64	version;
-	u64	sample_type;
-	u64	data_size;
-};
-
 #endif
diff --git a/tools/perf/types.h b/tools/perf/types.h
deleted file mode 100644
index 5e75f90..0000000
--- a/tools/perf/types.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef _PERF_TYPES_H
-#define _PERF_TYPES_H
-
-/*
- * We define u64 as unsigned long long for every architecture
- * so that we can print it with %Lx without getting warnings.
- */
-typedef unsigned long long u64;
-typedef signed long long   s64;
-typedef unsigned int	   u32;
-typedef signed int	   s32;
-typedef unsigned short	   u16;
-typedef signed short	   s16;
-typedef unsigned char	   u8;
-typedef signed char	   s8;
-
-#endif /* _PERF_TYPES_H */
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
new file mode 100644
index 0000000..450384b
--- /dev/null
+++ b/tools/perf/util/header.c
@@ -0,0 +1,242 @@
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "util.h"
+#include "header.h"
+
+/*
+ *
+ */
+
+struct perf_header_attr *perf_header_attr__new(struct perf_counter_attr *attr)
+{
+	struct perf_header_attr *self = malloc(sizeof(*self));
+
+	if (!self)
+		die("nomem");
+
+	self->attr = *attr;
+	self->ids = 0;
+	self->size = 1;
+	self->id = malloc(sizeof(u64));
+
+	if (!self->id)
+		die("nomem");
+
+	return self;
+}
+
+void perf_header_attr__add_id(struct perf_header_attr *self, u64 id)
+{
+	int pos = self->ids;
+
+	self->ids++;
+	if (self->ids > self->size) {
+		self->size *= 2;
+		self->id = realloc(self->id, self->size * sizeof(u64));
+		if (!self->id)
+			die("nomem");
+	}
+	self->id[pos] = id;
+}
+
+/*
+ *
+ */
+
+struct perf_header *perf_header__new(void)
+{
+	struct perf_header *self = malloc(sizeof(*self));
+
+	if (!self)
+		die("nomem");
+
+	self->frozen = 0;
+
+	self->attrs = 0;
+	self->size = 1;
+	self->attr = malloc(sizeof(void *));
+
+	if (!self->attr)
+		die("nomem");
+
+	self->data_offset = 0;
+	self->data_size = 0;
+
+	return self;
+}
+
+void perf_header__add_attr(struct perf_header *self,
+			   struct perf_header_attr *attr)
+{
+	int pos = self->attrs;
+
+	if (self->frozen)
+		die("frozen");
+
+	self->attrs++;
+	if (self->attrs > self->size) {
+		self->size *= 2;
+		self->attr = realloc(self->attr, self->size * sizeof(void *));
+		if (!self->attr)
+			die("nomem");
+	}
+	self->attr[pos] = attr;
+}
+
+static const char *__perf_magic = "PERFFILE";
+
+#define PERF_MAGIC	(*(u64 *)__perf_magic)
+
+struct perf_file_section {
+	u64 offset;
+	u64 size;
+};
+
+struct perf_file_attr {
+	struct perf_counter_attr	attr;
+	struct perf_file_section	ids;
+};
+
+struct perf_file_header {
+	u64				magic;
+	u64				size;
+	u64				attr_size;
+	struct perf_file_section	attrs;
+	struct perf_file_section	data;
+};
+
+static void do_write(int fd, void *buf, size_t size)
+{
+	while (size) {
+		int ret = write(fd, buf, size);
+
+		if (ret < 0)
+			die("failed to write");
+
+		size -= ret;
+		buf += ret;
+	}
+}
+
+void perf_header__write(struct perf_header *self, int fd)
+{
+	struct perf_file_header f_header;
+	struct perf_file_attr   f_attr;
+	struct perf_header_attr	*attr;
+	int i;
+
+	lseek(fd, sizeof(f_header), SEEK_SET);
+
+
+	for (i = 0; i < self->attrs; i++) {
+		attr = self->attr[i];
+
+		attr->id_offset = lseek(fd, 0, SEEK_CUR);
+		do_write(fd, attr->id, attr->ids * sizeof(u64));
+	}
+
+
+	self->attr_offset = lseek(fd, 0, SEEK_CUR);
+
+	for (i = 0; i < self->attrs; i++) {
+		attr = self->attr[i];
+
+		f_attr = (struct perf_file_attr){
+			.attr = attr->attr,
+			.ids  = {
+				.offset = attr->id_offset,
+				.size   = attr->ids * sizeof(u64),
+			}
+		};
+		do_write(fd, &f_attr, sizeof(f_attr));
+	}
+
+
+	self->data_offset = lseek(fd, 0, SEEK_CUR);
+
+	f_header = (struct perf_file_header){
+		.magic	   = PERF_MAGIC,
+		.size	   = sizeof(f_header),
+		.attr_size = sizeof(f_attr),
+		.attrs = {
+			.offset = self->attr_offset,
+			.size   = self->attrs * sizeof(f_attr),
+		},
+		.data = {
+			.offset = self->data_offset,
+			.size	= self->data_size,
+		},
+	};
+
+	lseek(fd, 0, SEEK_SET);
+	do_write(fd, &f_header, sizeof(f_header));
+	lseek(fd, self->data_offset + self->data_size, SEEK_SET);
+
+	self->frozen = 1;
+}
+
+static void do_read(int fd, void *buf, size_t size)
+{
+	while (size) {
+		int ret = read(fd, buf, size);
+
+		if (ret < 0)
+			die("failed to read");
+
+		size -= ret;
+		buf += ret;
+	}
+}
+
+struct perf_header *perf_header__read(int fd)
+{
+	struct perf_header	*self = perf_header__new();
+	struct perf_file_header f_header;
+	struct perf_file_attr	f_attr;
+	u64			f_id;
+
+	int nr_attrs, nr_ids, i, j;
+
+	lseek(fd, 0, SEEK_SET);
+	do_read(fd, &f_header, sizeof(f_header));
+
+	if (f_header.magic	!= PERF_MAGIC		||
+	    f_header.size	!= sizeof(f_header)	||
+	    f_header.attr_size	!= sizeof(f_attr))
+		die("incompatible file format");
+
+	nr_attrs = f_header.attrs.size / sizeof(f_attr);
+	lseek(fd, f_header.attrs.offset, SEEK_SET);
+
+	for (i = 0; i < nr_attrs; i++) {
+		struct perf_header_attr *attr;
+		off_t tmp = lseek(fd, 0, SEEK_CUR);
+
+		do_read(fd, &f_attr, sizeof(f_attr));
+
+		attr = perf_header_attr__new(&f_attr.attr);
+
+		nr_ids = f_attr.ids.size / sizeof(u64);
+		lseek(fd, f_attr.ids.offset, SEEK_SET);
+
+		for (j = 0; j < nr_ids; j++) {
+			do_read(fd, &f_id, sizeof(f_id));
+
+			perf_header_attr__add_id(attr, f_id);
+		}
+		perf_header__add_attr(self, attr);
+		lseek(fd, tmp, SEEK_SET);
+	}
+
+	self->data_offset = f_header.data.offset;
+	self->data_size   = f_header.data.size;
+
+	lseek(fd, self->data_offset + self->data_size, SEEK_SET);
+
+	self->frozen = 1;
+
+	return self;
+}
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
new file mode 100644
index 0000000..b5ef53a
--- /dev/null
+++ b/tools/perf/util/header.h
@@ -0,0 +1,37 @@
+#ifndef _PERF_HEADER_H
+#define _PERF_HEADER_H
+
+#include "../../../include/linux/perf_counter.h"
+#include <sys/types.h>
+#include "types.h"
+
+struct perf_header_attr {
+	struct perf_counter_attr attr;
+	int ids, size;
+	u64 *id;
+	off_t id_offset;
+};
+
+struct perf_header {
+	int frozen;
+	int attrs, size;
+	struct perf_header_attr **attr;
+	off_t attr_offset;
+	u64 data_offset;
+	u64 data_size;
+};
+
+struct perf_header *perf_header__read(int fd);
+void perf_header__write(struct perf_header *self, int fd);
+
+void perf_header__add_attr(struct perf_header *self,
+			   struct perf_header_attr *attr);
+
+struct perf_header_attr *
+perf_header_attr__new(struct perf_counter_attr *attr);
+void perf_header_attr__add_id(struct perf_header_attr *self, u64 id);
+
+
+struct perf_header *perf_header__new(void);
+
+#endif /* _PERF_HEADER_H */
diff --git a/tools/perf/util/string.h b/tools/perf/util/string.h
index 37b0325..3dca2f6 100644
--- a/tools/perf/util/string.h
+++ b/tools/perf/util/string.h
@@ -1,7 +1,7 @@
 #ifndef _PERF_STRING_H_
 #define _PERF_STRING_H_
 
-#include "../types.h"
+#include "types.h"
 
 int hex2u64(const char *ptr, u64 *val);
 
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index ea332e5..940b432 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -2,7 +2,7 @@
 #define _PERF_SYMBOL_ 1
 
 #include <linux/types.h>
-#include "../types.h"
+#include "types.h"
 #include "list.h"
 #include "rbtree.h"
 
diff --git a/tools/perf/util/types.h b/tools/perf/util/types.h
new file mode 100644
index 0000000..5e75f90
--- /dev/null
+++ b/tools/perf/util/types.h
@@ -0,0 +1,17 @@
+#ifndef _PERF_TYPES_H
+#define _PERF_TYPES_H
+
+/*
+ * We define u64 as unsigned long long for every architecture
+ * so that we can print it with %Lx without getting warnings.
+ */
+typedef unsigned long long u64;
+typedef signed long long   s64;
+typedef unsigned int	   u32;
+typedef signed int	   s32;
+typedef unsigned short	   u16;
+typedef signed short	   s16;
+typedef unsigned char	   u8;
+typedef signed char	   s8;
+
+#endif /* _PERF_TYPES_H */
-- 
cgit v1.1


From e6e18ec79b023d5fe84226cef533cf0e3770ce93 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 25 Jun 2009 11:27:12 +0200
Subject: perf_counter: Rework the sample ABI

The PERF_EVENT_READ implementation made me realize we don't
actually need the sample_type int the output sample, since
we already have that in the perf_counter_attr information.

Therefore, remove the PERF_EVENT_MISC_OVERFLOW bit and the
event->type overloading, and imply put counter overflow
samples in a PERF_EVENT_SAMPLE type.

This also fixes the issue that event->type was only 32-bit
and sample_type had 64 usable bits.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-annotate.c |  8 ++++----
 tools/perf/builtin-report.c   | 32 +++++++++++++++++++-------------
 tools/perf/builtin-top.c      | 11 ++++++-----
 3 files changed, 29 insertions(+), 22 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 7e58e3a..722c0f5 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -855,7 +855,7 @@ static unsigned long total = 0,
 		     total_unknown = 0;
 
 static int
-process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
+process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 {
 	char level;
 	int show = 0;
@@ -1013,10 +1013,10 @@ process_period_event(event_t *event, unsigned long offset, unsigned long head)
 static int
 process_event(event_t *event, unsigned long offset, unsigned long head)
 {
-	if (event->header.misc & PERF_EVENT_MISC_OVERFLOW)
-		return process_overflow_event(event, offset, head);
-
 	switch (event->header.type) {
+	case PERF_EVENT_SAMPLE:
+		return process_sample_event(event, offset, head);
+
 	case PERF_EVENT_MMAP:
 		return process_mmap_event(event, offset, head);
 
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index e575f30..ec5361c 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -53,6 +53,8 @@ static regex_t		parent_regex;
 
 static int		exclude_other = 1;
 
+static u64		sample_type;
+
 struct ip_event {
 	struct perf_event_header header;
 	u64 ip;
@@ -1135,7 +1137,7 @@ static int validate_chain(struct ip_callchain *chain, event_t *event)
 }
 
 static int
-process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
+process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 {
 	char level;
 	int show = 0;
@@ -1147,12 +1149,12 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
 	void *more_data = event->ip.__more_data;
 	struct ip_callchain *chain = NULL;
 
-	if (event->header.type & PERF_SAMPLE_PERIOD) {
+	if (sample_type & PERF_SAMPLE_PERIOD) {
 		period = *(u64 *)more_data;
 		more_data += sizeof(u64);
 	}
 
-	dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p period: %Ld\n",
+	dprintf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d: %p period: %Ld\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
 		event->header.misc,
@@ -1160,7 +1162,7 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
 		(void *)(long)ip,
 		(long long)period);
 
-	if (event->header.type & PERF_SAMPLE_CALLCHAIN) {
+	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
 		int i;
 
 		chain = (void *)more_data;
@@ -1352,10 +1354,10 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
 {
 	trace_event(event);
 
-	if (event->header.misc & PERF_EVENT_MISC_OVERFLOW)
-		return process_overflow_event(event, offset, head);
-
 	switch (event->header.type) {
+	case PERF_EVENT_SAMPLE:
+		return process_sample_event(event, offset, head);
+
 	case PERF_EVENT_MMAP:
 		return process_mmap_event(event, offset, head);
 
@@ -1388,18 +1390,21 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
 
 static struct perf_header	*header;
 
-static int perf_header__has_sample(u64 sample_mask)
+static u64 perf_header__sample_type(void)
 {
+	u64 sample_type = 0;
 	int i;
 
 	for (i = 0; i < header->attrs; i++) {
 		struct perf_header_attr *attr = header->attr[i];
 
-		if (!(attr->attr.sample_type & sample_mask))
-			return 0;
+		if (!sample_type)
+			sample_type = attr->attr.sample_type;
+		else if (sample_type != attr->attr.sample_type)
+			die("non matching sample_type");
 	}
 
-	return 1;
+	return sample_type;
 }
 
 static int __cmd_report(void)
@@ -1437,8 +1442,9 @@ static int __cmd_report(void)
 	header = perf_header__read(input);
 	head = header->data_offset;
 
-	if (sort__has_parent &&
-	    !perf_header__has_sample(PERF_SAMPLE_CALLCHAIN)) {
+	sample_type = perf_header__sample_type();
+
+	if (sort__has_parent && !(sample_type & PERF_SAMPLE_CALLCHAIN)) {
 		fprintf(stderr, "selected --sort parent, but no callchain data\n");
 		exit(-1);
 	}
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 5352b5e..cf0d21f 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -392,11 +392,11 @@ static void record_ip(u64 ip, int counter)
 	samples--;
 }
 
-static void process_event(u64 ip, int counter)
+static void process_event(u64 ip, int counter, int user)
 {
 	samples++;
 
-	if (ip < min_ip || ip > max_ip) {
+	if (user) {
 		userspace_samples++;
 		return;
 	}
@@ -509,9 +509,10 @@ static void mmap_read_counter(struct mmap_data *md)
 
 		old += size;
 
-		if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) {
-			if (event->header.type & PERF_SAMPLE_IP)
-				process_event(event->ip.ip, md->counter);
+		if (event->header.type == PERF_EVENT_SAMPLE) {
+			int user =
+	(event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK) == PERF_EVENT_MISC_USER;
+			process_event(event->ip.ip, md->counter, user);
 		}
 	}
 
-- 
cgit v1.1


From 649c48a9e7fafcc72bfcc99471d9dea98d789d59 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 24 Jun 2009 21:12:48 +0200
Subject: perf-report: Add modes for inherited stats and no-samples

Now that we can collect per task statistics, add modes that
make use of that facility.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-record.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index f4f0240..798a56d 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -41,6 +41,8 @@ static int			force				= 0;
 static int			append_file			= 0;
 static int			call_graph			= 0;
 static int			verbose				= 0;
+static int			inherit_stat			= 0;
+static int			no_samples			= 0;
 
 static long			samples;
 static struct timeval		last_read;
@@ -393,6 +395,12 @@ static void create_counter(int counter, int cpu, pid_t pid)
 		attr->sample_freq	= freq;
 	}
 
+	if (no_samples)
+		attr->sample_freq = 0;
+
+	if (inherit_stat)
+		attr->inherit_stat = 1;
+
 	if (call_graph)
 		attr->sample_type	|= PERF_SAMPLE_CALLCHAIN;
 
@@ -571,7 +579,7 @@ static int __cmd_record(int argc, const char **argv)
 		}
 	}
 
-	while (!done) {
+	for (;;) {
 		int hits = samples;
 
 		for (i = 0; i < nr_cpu; i++) {
@@ -579,8 +587,11 @@ static int __cmd_record(int argc, const char **argv)
 				mmap_read(&mmap_array[i][counter]);
 		}
 
-		if (hits == samples)
+		if (hits == samples) {
+			if (done)
+				break;
 			ret = poll(event_array, nr_poll, 100);
+		}
 	}
 
 	/*
@@ -629,6 +640,10 @@ static const struct option options[] = {
 		    "do call-graph (stack chain/backtrace) recording"),
 	OPT_BOOLEAN('v', "verbose", &verbose,
 		    "be more verbose (show counter open errors, etc)"),
+	OPT_BOOLEAN('s', "stat", &inherit_stat,
+		    "per thread counts"),
+	OPT_BOOLEAN('n', "no-samples", &no_samples,
+		    "don't sample"),
 	OPT_END()
 };
 
-- 
cgit v1.1


From e9ea2fde7a07ae60a119171a2946ed2ae778271e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 24 Jun 2009 22:46:04 +0200
Subject: perf-report: Add bare minimum PERF_EVENT_READ parsing

Provide the basic infrastructure to provide per task stats.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-report.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'tools')

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index ec5361c..681c223 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -100,6 +100,13 @@ struct lost_event {
 	u64 lost;
 };
 
+struct read_event {
+	struct perf_event_header header;
+	u32 pid,tid;
+	u64 value;
+	u64 format[3];
+};
+
 typedef union event_union {
 	struct perf_event_header	header;
 	struct ip_event			ip;
@@ -108,6 +115,7 @@ typedef union event_union {
 	struct fork_event		fork;
 	struct period_event		period;
 	struct lost_event		lost;
+	struct read_event		read;
 } event_t;
 
 static LIST_HEAD(dsos);
@@ -1350,6 +1358,19 @@ static void trace_event(event_t *event)
 }
 
 static int
+process_read_event(event_t *event, unsigned long offset, unsigned long head)
+{
+	dprintf("%p [%p]: PERF_EVENT_READ: %d %d %Lu\n",
+			(void *)(offset + head),
+			(void *)(long)(event->header.size),
+			event->read.pid,
+			event->read.tid,
+			event->read.value);
+
+	return 0;
+}
+
+static int
 process_event(event_t *event, unsigned long offset, unsigned long head)
 {
 	trace_event(event);
@@ -1373,6 +1394,9 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
 	case PERF_EVENT_LOST:
 		return process_lost_event(event, offset, head);
 
+	case PERF_EVENT_READ:
+		return process_read_event(event, offset, head);
+
 	/*
 	 * We dont process them right now but they are fine:
 	 */
-- 
cgit v1.1


From 4418351f06d9ce73acc846158c20186965f920f3 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Thu, 25 Jun 2009 21:27:42 +0530
Subject: perf_counter tools: Add alias for 'l1d' and 'l1i'

Add 'l1d' and 'l1i' aliases again as shortcuts - just dont make them
the primary display alias.

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1245945462.9157.11.camel@hpdv5.satnam>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/parse-events.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 430f060..4d042f1 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -71,8 +71,8 @@ static char *sw_event_names[] = {
 #define MAX_ALIASES 8
 
 static char *hw_cache[][MAX_ALIASES] = {
- { "L1-d$",	"l1-d",		"L1-data",				},
- { "L1-i$",	"l1-i",		"L1-instruction",			},
+ { "L1-d$",	"l1-d",		"l1d",		"L1-data",		},
+ { "L1-i$",	"l1-i",		"l1i",		"L1-instruction",	},
  { "LLC",	"L2"							},
  { "dTLB",	"d-tlb",	"Data-TLB",				},
  { "iTLB",	"i-tlb",	"Instruction-TLB",			},
-- 
cgit v1.1


From 3928ddbe994cce1da1b6365b0db04d5765f254f4 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 25 Jun 2009 22:21:27 +0200
Subject: perf record: Fix unhandled io return value
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Building latest perfcounter fails on the following error:

 builtin-record.c: In function ‘create_counter’:
 builtin-record.c:451: erreur: ignoring return value of ‘read’, declared with attribute warn_unused_result
 make: *** [builtin-record.o] Erreur 1

Just check if we successfully read the perf file descriptor.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <1245961287-5327-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-record.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 798a56d..d18546f 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -448,7 +448,10 @@ try_again:
 		}
 	}
 
-	read(fd[nr_cpu][counter], &read_data, sizeof(read_data));
+	if (read(fd[nr_cpu][counter], &read_data, sizeof(read_data)) == -1) {
+		perror("Unable to read perf file descriptor\n");
+		exit(-1);
+	}
 
 	perf_header_attr__add_id(h_attr, read_data.id);
 
-- 
cgit v1.1


From 8cb76d99d715741637b6d0884f389e17e9cb05d2 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 26 Jun 2009 16:28:00 +0200
Subject: perf_counter tools: Prepare a small callchain framework

We plan to display the callchains depending on some user-configurable
parameters.

To gather the callchains stats from the recorded stream in a fast way,
this patch introduces an ad hoc radix tree adapted for callchains and also
a rbtree to sort these callchains once we have gathered every events
from the stream.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1246026481-8314-2-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/Makefile         |   1 +
 tools/perf/builtin-report.c |   5 --
 tools/perf/perf.h           |   5 ++
 tools/perf/util/callchain.c | 174 ++++++++++++++++++++++++++++++++++++++++++++
 tools/perf/util/callchain.h |  33 +++++++++
 5 files changed, 213 insertions(+), 5 deletions(-)
 create mode 100644 tools/perf/util/callchain.c
 create mode 100644 tools/perf/util/callchain.h

(limited to 'tools')

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index d3887ed..1c1296d 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -329,6 +329,7 @@ LIB_OBJS += util/symbol.o
 LIB_OBJS += util/color.o
 LIB_OBJS += util/pager.o
 LIB_OBJS += util/header.o
+LIB_OBJS += util/callchain.o
 
 BUILTIN_OBJS += builtin-annotate.o
 BUILTIN_OBJS += builtin-help.o
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 681c223..28d1cb2 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -62,11 +62,6 @@ struct ip_event {
 	unsigned char __more_data[];
 };
 
-struct ip_callchain {
-	u64 nr;
-	u64 ips[0];
-};
-
 struct mmap_event {
 	struct perf_event_header header;
 	u32 pid, tid;
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 16c84fd..a49842b 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -66,4 +66,9 @@ sys_perf_counter_open(struct perf_counter_attr *attr,
 #define MAX_COUNTERS			256
 #define MAX_NR_CPUS			256
 
+struct ip_callchain {
+	u64 nr;
+	u64 ips[0];
+};
+
 #endif
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
new file mode 100644
index 0000000..ad3c285
--- /dev/null
+++ b/tools/perf/util/callchain.c
@@ -0,0 +1,174 @@
+/*
+ * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com>
+ *
+ * Handle the callchains from the stream in an ad-hoc radix tree and then
+ * sort them in an rbtree.
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <errno.h>
+
+#include "callchain.h"
+
+
+static void rb_insert_callchain(struct rb_root *root, struct callchain_node *chain)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct callchain_node *rnode;
+
+	while (*p) {
+		parent = *p;
+		rnode = rb_entry(parent, struct callchain_node, rb_node);
+
+		if (rnode->hit < chain->hit)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&chain->rb_node, parent, p);
+	rb_insert_color(&chain->rb_node, root);
+}
+
+/*
+ * Once we get every callchains from the stream, we can now
+ * sort them by hit
+ */
+void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node)
+{
+	struct callchain_node *child;
+
+	list_for_each_entry(child, &node->children, brothers)
+		sort_chain_to_rbtree(rb_root, child);
+
+	if (node->hit)
+		rb_insert_callchain(rb_root, node);
+}
+
+static struct callchain_node *create_child(struct callchain_node *parent)
+{
+	struct callchain_node *new;
+
+	new = malloc(sizeof(*new));
+	if (!new) {
+		perror("not enough memory to create child for code path tree");
+		return NULL;
+	}
+	new->parent = parent;
+	INIT_LIST_HEAD(&new->children);
+	INIT_LIST_HEAD(&new->val);
+	list_add_tail(&new->brothers, &parent->children);
+
+	return new;
+}
+
+static void
+fill_node(struct callchain_node *node, struct ip_callchain *chain, int start)
+{
+	int i;
+
+	for (i = start; i < chain->nr; i++) {
+		struct callchain_list *call;
+
+		call = malloc(sizeof(*chain));
+		if (!call) {
+			perror("not enough memory for the code path tree");
+			return;
+		}
+		call->ip = chain->ips[i];
+		list_add_tail(&call->list, &node->val);
+	}
+	node->val_nr = i - start;
+}
+
+static void add_child(struct callchain_node *parent, struct ip_callchain *chain)
+{
+	struct callchain_node *new;
+
+	new = create_child(parent);
+	fill_node(new, chain, parent->val_nr);
+
+	new->hit = 1;
+}
+
+static void
+split_add_child(struct callchain_node *parent, struct ip_callchain *chain,
+		struct callchain_list *to_split, int idx)
+{
+	struct callchain_node *new;
+
+	/* split */
+	new = create_child(parent);
+	list_move_tail(&to_split->list, &new->val);
+	new->hit = parent->hit;
+	parent->hit = 0;
+	parent->val_nr = idx;
+
+	/* create the new one */
+	add_child(parent, chain);
+}
+
+static int
+__append_chain(struct callchain_node *root, struct ip_callchain *chain,
+		int start);
+
+static int
+__append_chain_children(struct callchain_node *root, struct ip_callchain *chain)
+{
+	struct callchain_node *rnode;
+
+	/* lookup in childrens */
+	list_for_each_entry(rnode, &root->children, brothers) {
+		int ret = __append_chain(rnode, chain, root->val_nr);
+		if (!ret)
+			return 0;
+	}
+	return -1;
+}
+
+static int
+__append_chain(struct callchain_node *root, struct ip_callchain *chain,
+		int start)
+{
+	struct callchain_list *cnode;
+	int i = start;
+	bool found = false;
+
+	/* lookup in the current node */
+	list_for_each_entry(cnode, &root->val, list) {
+		if (cnode->ip != chain->ips[i++])
+			break;
+		if (!found)
+			found = true;
+		if (i == chain->nr)
+			break;
+	}
+
+	/* matches not, relay on the parent */
+	if (!found)
+		return -1;
+
+	/* we match only a part of the node. Split it and add the new chain */
+	if (i < root->val_nr) {
+		split_add_child(root, chain, cnode, i);
+		return 0;
+	}
+
+	/* we match 100% of the path, increment the hit */
+	if (i == root->val_nr) {
+		root->hit++;
+		return 0;
+	}
+
+	return __append_chain_children(root, chain);
+}
+
+void append_chain(struct callchain_node *root, struct ip_callchain *chain)
+{
+	if (__append_chain_children(root, chain) == -1)
+		add_child(root, chain);
+}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
new file mode 100644
index 0000000..fa1cd2f
--- /dev/null
+++ b/tools/perf/util/callchain.h
@@ -0,0 +1,33 @@
+#ifndef __PERF_CALLCHAIN_H
+#define __PERF_CALLCHAIN_H
+
+#include "../perf.h"
+#include "list.h"
+#include "rbtree.h"
+
+
+struct callchain_node {
+	struct callchain_node	*parent;
+	struct list_head	brothers;
+	struct list_head 	children;
+	struct list_head 	val;
+	struct rb_node		rb_node;
+	int			val_nr;
+	int			hit;
+};
+
+struct callchain_list {
+	unsigned long		ip;
+	struct list_head	list;
+};
+
+static inline void callchain_init(struct callchain_node *node)
+{
+	INIT_LIST_HEAD(&node->brothers);
+	INIT_LIST_HEAD(&node->children);
+	INIT_LIST_HEAD(&node->val);
+}
+
+void append_chain(struct callchain_node *root, struct ip_callchain *chain);
+void sort_chain_to_rbtree(struct rb_root *rb_root, struct callchain_node *node);
+#endif
-- 
cgit v1.1


From f55c555226b1010b249730ec6b232e5470286950 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 26 Jun 2009 16:28:01 +0200
Subject: perf report: Print sorted callchains per histogram entries

Use the newly created callchains radix tree to gather the chains stats
from the recorded events and then print the callchains for all of them,
sorted by hits, using the "-c" parameter with perf report.

Example:

 66.15%  [k] atm_clip_exit
            63.08%
                0xffffffffffffff80
                0xffffffff810196a8
                0xffffffff810c14c8
                0xffffffff8101a79c
                0xffffffff810194f3
                0xffffffff8106ab7f
                0xffffffff8106abe5
                0xffffffff8106acde
                0xffffffff8100d94b
                0xffffffff8153e7ea
                [...]

             1.54%
                0xffffffffffffff80
                0xffffffff810196a8
                0xffffffff810c14c8
                0xffffffff8101a79c
		[...]

Symbols are not yet resolved.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1246026481-8314-3-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-report.c | 82 +++++++++++++++++++++++++++++++++++++++------
 1 file changed, 71 insertions(+), 11 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 28d1cb2..ed391db 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -15,6 +15,7 @@
 #include "util/rbtree.h"
 #include "util/symbol.h"
 #include "util/string.h"
+#include "util/callchain.h"
 
 #include "perf.h"
 #include "util/header.h"
@@ -52,6 +53,7 @@ static char		*parent_pattern = default_parent_pattern;
 static regex_t		parent_regex;
 
 static int		exclude_other = 1;
+static int		callchain;
 
 static u64		sample_type;
 
@@ -488,17 +490,19 @@ static size_t threads__fprintf(FILE *fp)
 static struct rb_root hist;
 
 struct hist_entry {
-	struct rb_node	 rb_node;
-
-	struct thread	 *thread;
-	struct map	 *map;
-	struct dso	 *dso;
-	struct symbol	 *sym;
-	struct symbol	 *parent;
-	u64		 ip;
-	char		 level;
-
-	u64		 count;
+	struct rb_node		rb_node;
+
+	struct thread		*thread;
+	struct map		*map;
+	struct dso		*dso;
+	struct symbol		*sym;
+	struct symbol		*parent;
+	u64			ip;
+	char			level;
+	struct callchain_node	callchain;
+	struct rb_root		sorted_chain;
+
+	u64			count;
 };
 
 /*
@@ -769,6 +773,48 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
 }
 
 static size_t
+callchain__fprintf(FILE *fp, struct callchain_node *self, u64 total_samples)
+{
+	struct callchain_list *chain;
+	size_t ret = 0;
+
+	if (!self)
+		return 0;
+
+	ret += callchain__fprintf(fp, self->parent, total_samples);
+
+
+	list_for_each_entry(chain, &self->val, list)
+		ret += fprintf(fp, "                %p\n", (void *)chain->ip);
+
+	return ret;
+}
+
+static size_t
+hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self,
+			      u64 total_samples)
+{
+	struct rb_node *rb_node;
+	struct callchain_node *chain;
+	size_t ret = 0;
+
+	rb_node = rb_first(&self->sorted_chain);
+	while (rb_node) {
+		double percent;
+
+		chain = rb_entry(rb_node, struct callchain_node, rb_node);
+		percent = chain->hit * 100.0 / total_samples;
+		ret += fprintf(fp, "           %6.2f%%\n", percent);
+		ret += callchain__fprintf(fp, chain, total_samples);
+		ret += fprintf(fp, "\n");
+		rb_node = rb_next(rb_node);
+	}
+
+	return ret;
+}
+
+
+static size_t
 hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples)
 {
 	struct sort_entry *se;
@@ -808,6 +854,9 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples)
 
 	ret += fprintf(fp, "\n");
 
+	if (callchain)
+		hist_entry_callchain__fprintf(fp, self, total_samples);
+
 	return ret;
 }
 
@@ -892,6 +941,7 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
 		.level	= level,
 		.count	= count,
 		.parent = NULL,
+		.sorted_chain = RB_ROOT
 	};
 	int cmp;
 
@@ -934,6 +984,8 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
 
 		if (!cmp) {
 			he->count += count;
+			if (callchain)
+				append_chain(&he->callchain, chain);
 			return 0;
 		}
 
@@ -947,6 +999,10 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso,
 	if (!he)
 		return -ENOMEM;
 	*he = entry;
+	if (callchain) {
+		callchain_init(&he->callchain);
+		append_chain(&he->callchain, chain);
+	}
 	rb_link_node(&he->rb_node, parent, p);
 	rb_insert_color(&he->rb_node, &hist);
 
@@ -1023,6 +1079,9 @@ static void output__insert_entry(struct hist_entry *he)
 	struct rb_node *parent = NULL;
 	struct hist_entry *iter;
 
+	if (callchain)
+		sort_chain_to_rbtree(&he->sorted_chain, &he->callchain);
+
 	while (*p != NULL) {
 		parent = *p;
 		iter = rb_entry(parent, struct hist_entry, rb_node);
@@ -1599,6 +1658,7 @@ static const struct option options[] = {
 		   "regex filter to identify parent, see: '--sort parent'"),
 	OPT_BOOLEAN('x', "exclude-other", &exclude_other,
 		    "Only display entries with parent-match"),
+	OPT_BOOLEAN('c', "callchain", &callchain, "Display callchains"),
 	OPT_END()
 };
 
-- 
cgit v1.1


From fde953c1c67986e1c381fa50d8207b1578b5cefa Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 27 Jun 2009 06:06:39 +0200
Subject: perf_counter tools: Remove dead code

Vince Weaver reported that there's a handful of #ifdef __MINGW32__
sections in the code.

Remove them as they are in essence dead code - as unlike upstream
Git, the perf tool is unlikely to be ported to Windows.

Reported-by: Vince Weaver <vince@deater.net>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/help.c        | 15 -------
 tools/perf/util/pager.c       |  5 +--
 tools/perf/util/run-command.c | 95 +------------------------------------------
 tools/perf/util/run-command.h |  5 ---
 tools/perf/util/util.h        | 15 -------
 5 files changed, 3 insertions(+), 132 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/help.c b/tools/perf/util/help.c
index 6653f7d..17a00e0 100644
--- a/tools/perf/util/help.c
+++ b/tools/perf/util/help.c
@@ -126,21 +126,6 @@ static int is_executable(const char *name)
 	    !S_ISREG(st.st_mode))
 		return 0;
 
-#ifdef __MINGW32__
-	/* cannot trust the executable bit, peek into the file instead */
-	char buf[3] = { 0 };
-	int n;
-	int fd = open(name, O_RDONLY);
-	st.st_mode &= ~S_IXUSR;
-	if (fd >= 0) {
-		n = read(fd, buf, 2);
-		if (n == 2)
-			/* DOS executables start with "MZ" */
-			if (!strcmp(buf, "#!") || !strcmp(buf, "MZ"))
-				st.st_mode |= S_IXUSR;
-		close(fd);
-	}
-#endif
 	return st.st_mode & S_IXUSR;
 }
 
diff --git a/tools/perf/util/pager.c b/tools/perf/util/pager.c
index a28bcca..1915de2 100644
--- a/tools/perf/util/pager.c
+++ b/tools/perf/util/pager.c
@@ -9,7 +9,6 @@
 
 static int spawned_pager;
 
-#ifndef __MINGW32__
 static void pager_preexec(void)
 {
 	/*
@@ -24,7 +23,6 @@ static void pager_preexec(void)
 
 	setenv("LESS", "FRSX", 0);
 }
-#endif
 
 static const char *pager_argv[] = { "sh", "-c", NULL, NULL };
 static struct child_process pager_process;
@@ -70,9 +68,8 @@ void setup_pager(void)
 	pager_argv[2] = pager;
 	pager_process.argv = pager_argv;
 	pager_process.in = -1;
-#ifndef __MINGW32__
 	pager_process.preexec_cb = pager_preexec;
-#endif
+
 	if (start_command(&pager_process))
 		return;
 
diff --git a/tools/perf/util/run-command.c b/tools/perf/util/run-command.c
index b2f5e85..a393534 100644
--- a/tools/perf/util/run-command.c
+++ b/tools/perf/util/run-command.c
@@ -65,7 +65,6 @@ int start_command(struct child_process *cmd)
 		cmd->err = fderr[0];
 	}
 
-#ifndef __MINGW32__
 	fflush(NULL);
 	cmd->pid = fork();
 	if (!cmd->pid) {
@@ -118,71 +117,6 @@ int start_command(struct child_process *cmd)
 		}
 		exit(127);
 	}
-#else
-	int s0 = -1, s1 = -1, s2 = -1;	/* backups of stdin, stdout, stderr */
-	const char **sargv = cmd->argv;
-	char **env = environ;
-
-	if (cmd->no_stdin) {
-		s0 = dup(0);
-		dup_devnull(0);
-	} else if (need_in) {
-		s0 = dup(0);
-		dup2(fdin[0], 0);
-	} else if (cmd->in) {
-		s0 = dup(0);
-		dup2(cmd->in, 0);
-	}
-
-	if (cmd->no_stderr) {
-		s2 = dup(2);
-		dup_devnull(2);
-	} else if (need_err) {
-		s2 = dup(2);
-		dup2(fderr[1], 2);
-	}
-
-	if (cmd->no_stdout) {
-		s1 = dup(1);
-		dup_devnull(1);
-	} else if (cmd->stdout_to_stderr) {
-		s1 = dup(1);
-		dup2(2, 1);
-	} else if (need_out) {
-		s1 = dup(1);
-		dup2(fdout[1], 1);
-	} else if (cmd->out > 1) {
-		s1 = dup(1);
-		dup2(cmd->out, 1);
-	}
-
-	if (cmd->dir)
-		die("chdir in start_command() not implemented");
-	if (cmd->env) {
-		env = copy_environ();
-		for (; *cmd->env; cmd->env++)
-			env = env_setenv(env, *cmd->env);
-	}
-
-	if (cmd->perf_cmd) {
-		cmd->argv = prepare_perf_cmd(cmd->argv);
-	}
-
-	cmd->pid = mingw_spawnvpe(cmd->argv[0], cmd->argv, env);
-
-	if (cmd->env)
-		free_environ(env);
-	if (cmd->perf_cmd)
-		free(cmd->argv);
-
-	cmd->argv = sargv;
-	if (s0 >= 0)
-		dup2(s0, 0), close(s0);
-	if (s1 >= 0)
-		dup2(s1, 1), close(s1);
-	if (s2 >= 0)
-		dup2(s2, 2), close(s2);
-#endif
 
 	if (cmd->pid < 0) {
 		int err = errno;
@@ -288,14 +222,6 @@ int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const
 	return run_command(&cmd);
 }
 
-#ifdef __MINGW32__
-static __stdcall unsigned run_thread(void *data)
-{
-	struct async *async = data;
-	return async->proc(async->fd_for_proc, async->data);
-}
-#endif
-
 int start_async(struct async *async)
 {
 	int pipe_out[2];
@@ -304,7 +230,6 @@ int start_async(struct async *async)
 		return error("cannot create pipe: %s", strerror(errno));
 	async->out = pipe_out[0];
 
-#ifndef __MINGW32__
 	/* Flush stdio before fork() to avoid cloning buffers */
 	fflush(NULL);
 
@@ -319,33 +244,17 @@ int start_async(struct async *async)
 		exit(!!async->proc(pipe_out[1], async->data));
 	}
 	close(pipe_out[1]);
-#else
-	async->fd_for_proc = pipe_out[1];
-	async->tid = (HANDLE) _beginthreadex(NULL, 0, run_thread, async, 0, NULL);
-	if (!async->tid) {
-		error("cannot create thread: %s", strerror(errno));
-		close_pair(pipe_out);
-		return -1;
-	}
-#endif
+
 	return 0;
 }
 
 int finish_async(struct async *async)
 {
-#ifndef __MINGW32__
 	int ret = 0;
 
 	if (wait_or_whine(async->pid))
 		ret = error("waitpid (async) failed");
-#else
-	DWORD ret = 0;
-	if (WaitForSingleObject(async->tid, INFINITE) != WAIT_OBJECT_0)
-		ret = error("waiting for thread failed: %lu", GetLastError());
-	else if (!GetExitCodeThread(async->tid, &ret))
-		ret = error("cannot get thread exit code: %lu", GetLastError());
-	CloseHandle(async->tid);
-#endif
+
 	return ret;
 }
 
diff --git a/tools/perf/util/run-command.h b/tools/perf/util/run-command.h
index 328289f..cc1837d 100644
--- a/tools/perf/util/run-command.h
+++ b/tools/perf/util/run-command.h
@@ -79,12 +79,7 @@ struct async {
 	int (*proc)(int fd, void *data);
 	void *data;
 	int out;	/* caller reads from here and closes it */
-#ifndef __MINGW32__
 	pid_t pid;
-#else
-	HANDLE tid;
-	int fd_for_proc;
-#endif
 };
 
 int start_async(struct async *async);
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index b8cfed7..b4be607 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -67,7 +67,6 @@
 #include <assert.h>
 #include <regex.h>
 #include <utime.h>
-#ifndef __MINGW32__
 #include <sys/wait.h>
 #include <sys/poll.h>
 #include <sys/socket.h>
@@ -81,20 +80,6 @@
 #include <netdb.h>
 #include <pwd.h>
 #include <inttypes.h>
-#if defined(__CYGWIN__)
-#undef _XOPEN_SOURCE
-#include <grp.h>
-#define _XOPEN_SOURCE 600
-#include "compat/cygwin.h"
-#else
-#undef _ALL_SOURCE /* AIX 5.3L defines a struct list with _ALL_SOURCE. */
-#include <grp.h>
-#define _ALL_SOURCE 1
-#endif
-#else 	/* __MINGW32__ */
-/* pull in Windows compatibility stuff */
-#include "compat/mingw.h"
-#endif	/* __MINGW32__ */
 
 #ifndef NO_ICONV
 #include <iconv.h>
-- 
cgit v1.1


From 0cfb7a13b8e4e0afd4b856156ab16a182de7505b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 27 Jun 2009 06:10:30 +0200
Subject: perf stat: Add -n/--null option to run without counters

Allow a no-counters run. This can be useful to measure just
elapsed wall-clock time - or to assess the raw overhead of perf
stat itself, without running any counters.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-stat.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 8420ec5..cdcd058 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -70,6 +70,7 @@ static int			run_count			=  1;
 static int			inherit				=  1;
 static int			scale				=  1;
 static int			target_pid			= -1;
+static int			null_run			=  0;
 
 static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
 
@@ -461,6 +462,8 @@ static const struct option options[] = {
 		    "be more verbose (show counter open errors, etc)"),
 	OPT_INTEGER('r', "repeat", &run_count,
 		    "repeat command and print average + stddev (max: 100)"),
+	OPT_BOOLEAN('n', "null", &null_run,
+		    "null run - dont start any counters"),
 	OPT_END()
 };
 
@@ -476,7 +479,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix)
 	if (run_count <= 0 || run_count > MAX_RUN)
 		usage_with_options(stat_usage, options);
 
-	if (!nr_counters)
+	if (!null_run && !nr_counters)
 		nr_counters = 8;
 
 	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
-- 
cgit v1.1


From 566747e6298289c5cb02d4939cb3abf1c4fe7e5a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 27 Jun 2009 06:24:32 +0200
Subject: perf stat: Fix multi-run stats

In multi-run (-r/--repeat) printouts, print out the noise of
the wall-clock average as well.

Also, fix a bug in printing out scaled counters: if it was not
scaled then we should not update the average with -1.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-stat.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index cdcd058..52c176c 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -353,8 +353,11 @@ static void calc_avg(void)
 				event_res_avg[j]+1, event_res[i][j]+1);
 			update_avg("counter/2", j,
 				event_res_avg[j]+2, event_res[i][j]+2);
-			update_avg("scaled", j,
-				event_scaled_avg + j, event_scaled[i]+j);
+			if (event_scaled[i][j] != -1)
+				update_avg("scaled", j,
+					event_scaled_avg + j, event_scaled[i]+j);
+			else
+				event_scaled_avg[j] = -1;
 		}
 	}
 	runtime_nsecs_avg /= run_count;
@@ -420,9 +423,13 @@ static void print_stat(int argc, const char **argv)
 
 
 	fprintf(stderr, "\n");
-	fprintf(stderr, " %14.9f  seconds time elapsed.\n",
+	fprintf(stderr, " %14.9f  seconds time elapsed",
 			(double)walltime_nsecs_avg/1e9);
-	fprintf(stderr, "\n");
+	if (run_count > 1) {
+		fprintf(stderr, "   ( +- %7.3f%% )",
+			100.0*(double)walltime_nsecs_noise/(double)walltime_nsecs_avg);
+	}
+	fprintf(stderr, "\n\n");
 }
 
 static volatile int signr = -1;
-- 
cgit v1.1


From 6e750a8fc009fd0ae98704525d1d8e80d60e8cc9 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Sat, 27 Jun 2009 03:02:07 +0530
Subject: perf stat: Improve output

Increase size for event name to handle bigger names like
'L1-d$-prefetch-misses'

Changed scaled counters from percentage to a multiplicative
factor because the latter is more expressive.

Also aligned the scaling factor, otherwise sometimes it looks
like:

            384  iTLB-load-misses           (4.74x scaled)
         452029  branch-loads               (8.00x scaled)
           5892  branch-load-misses         (20.39x scaled)
         972315  iTLB-loads                 (3.24x scaled)

Before:
         150708  L1-d$-stores          (scaled from 23.57%)
         428804  L1-d$-prefetches      (scaled from 23.47%)
         314446  L1-d$-prefetch-misses  (scaled from 23.42%)
      252626137  L1-i$-loads           (scaled from 23.24%)
        5297550  dTLB-load-misses      (scaled from 23.96%)
      106992392  branch-loads          (scaled from 23.67%)
        5239561  branch-load-misses    (scaled from 23.43%)

After:
        1731713  L1-d$-loads               (  14.25x scaled)
          44241  L1-d$-prefetches          (   3.88x scaled)
          21076  L1-d$-prefetch-misses     (   3.40x scaled)
        5789421  L1-i$-loads               (   3.78x scaled)
          29645  dTLB-load-misses          (   2.95x scaled)
         461474  branch-loads              (   6.52x scaled)
           7493  branch-load-misses        (  26.57x scaled)

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <1246051927.2988.10.camel@hpdv5.satnam>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-stat.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 52c176c..3840a70 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -32,6 +32,7 @@
  *   Wu Fengguang <fengguang.wu@intel.com>
  *   Mike Galbraith <efault@gmx.de>
  *   Paul Mackerras <paulus@samba.org>
+ *   Jaswinder Singh Rajput <jaswinder@kernel.org>
  *
  * Released under the GPL v2. (and only v2, not any later version)
  */
@@ -251,7 +252,7 @@ static void nsec_printout(int counter, u64 *count, u64 *noise)
 {
 	double msecs = (double)count[0] / 1000000;
 
-	fprintf(stderr, " %14.6f  %-20s", msecs, event_name(counter));
+	fprintf(stderr, " %14.6f  %-24s", msecs, event_name(counter));
 
 	if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
 		attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) {
@@ -265,7 +266,7 @@ static void nsec_printout(int counter, u64 *count, u64 *noise)
 
 static void abs_printout(int counter, u64 *count, u64 *noise)
 {
-	fprintf(stderr, " %14Ld  %-20s", count[0], event_name(counter));
+	fprintf(stderr, " %14Ld  %-24s", count[0], event_name(counter));
 
 	if (runtime_cycles_avg &&
 		attrs[counter].type == PERF_TYPE_HARDWARE &&
@@ -295,7 +296,7 @@ static void print_counter(int counter)
 	scaled = event_scaled_avg[counter];
 
 	if (scaled == -1) {
-		fprintf(stderr, " %14s  %-20s\n",
+		fprintf(stderr, " %14s  %-24s\n",
 			"<not counted>", event_name(counter));
 		return;
 	}
@@ -306,8 +307,7 @@ static void print_counter(int counter)
 		abs_printout(counter, count, noise);
 
 	if (scaled)
-		fprintf(stderr, "  (scaled from %.2f%%)",
-			(double) count[2] / count[1] * 100);
+		fprintf(stderr, "  (%7.2fx scaled)", (double)count[1]/count[2]);
 
 	fprintf(stderr, "\n");
 }
@@ -421,7 +421,6 @@ static void print_stat(int argc, const char **argv)
 	for (counter = 0; counter < nr_counters; counter++)
 		print_counter(counter);
 
-
 	fprintf(stderr, "\n");
 	fprintf(stderr, " %14.9f  seconds time elapsed",
 			(double)walltime_nsecs_avg/1e9);
-- 
cgit v1.1


From c3043569dc8fbe9228b76174f15d1a7152c48a20 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Sat, 27 Jun 2009 23:49:09 +0530
Subject: perf stat: Micro-optimize the code: memcpy is only required if no
 event is selected and !null_run

Set attrs and nr_counters if no event is selected and !null_run.

Setting of attrs should depend on number of counters,
so we need to memcpy only for sizeof(default_attrs)

Also set nr_counters as ARRAY_SIZE(default_attrs) in place of
hardcoded value.

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <1246126749.32198.16.camel@hpdv5.satnam>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-stat.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 3840a70..3e5ea4e 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -46,7 +46,7 @@
 #include <sys/prctl.h>
 #include <math.h>
 
-static struct perf_counter_attr default_attrs[MAX_COUNTERS] = {
+static struct perf_counter_attr default_attrs[] = {
 
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK	},
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES},
@@ -477,16 +477,17 @@ int cmd_stat(int argc, const char **argv, const char *prefix)
 {
 	int status;
 
-	memcpy(attrs, default_attrs, sizeof(attrs));
-
 	argc = parse_options(argc, argv, options, stat_usage, 0);
 	if (!argc)
 		usage_with_options(stat_usage, options);
 	if (run_count <= 0 || run_count > MAX_RUN)
 		usage_with_options(stat_usage, options);
 
-	if (!null_run && !nr_counters)
-		nr_counters = 8;
+	/* Set attrs and nr_counters if no event is selected and !null_run */
+	if (!null_run && !nr_counters) {
+		memcpy(attrs, default_attrs, sizeof(default_attrs));
+		nr_counters = ARRAY_SIZE(default_attrs);
+	}
 
 	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
 	assert(nr_cpus <= MAX_NR_CPUS);
-- 
cgit v1.1


From 210ad39fb7ef0bc0494483f517f42524f16bb2a7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 29 Jun 2009 21:50:54 +0200
Subject: perf stat: Use percentages for scaling output

Peter expressed a strong preference for percentage based
display of scaled values - so revert to that from the
recently introduced multiplication-factor unit.

Reported-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Jaswinder Singh Rajput <jaswinder@kernel.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-stat.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 3e5ea4e..c5a2907 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -307,7 +307,8 @@ static void print_counter(int counter)
 		abs_printout(counter, count, noise);
 
 	if (scaled)
-		fprintf(stderr, "  (%7.2fx scaled)", (double)count[1]/count[2]);
+		fprintf(stderr, "  (scaled from %.2f%%)",
+			(double) count[2] / count[1] * 100);
 
 	fprintf(stderr, "\n");
 }
-- 
cgit v1.1


From 051ae7f7344f453616b6b10332d4d8e1d40ed823 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 29 Jun 2009 21:13:21 +1000
Subject: perf_counter tools: Reduce perf stat measurement overhead/skew

Vince Weaver reported a 'perf stat' measurement overhead in the
count of retired instructions, which can amount to a +6000
instructions inflated count in the reported count.

At present, perf stat creates its counters on the perf process.  Thus
the counters count the fork and various other activity in both the
parent and child, such as the resolver overhead for resolving PLT
entries for any libc functions that haven't been called before, such
as execvp.

This reduces the overhead by creating the counters on the child process
after the fork, using a couple of pipes to synchronize so that the
child process waits until the parent has created the counters before
doing the exec.  To eliminate the PLT resolution overhead on calling
execvp, this does a dummy execvp first which will always fail.

With this, the overhead of executing a program goes down from over
4800 instructions to about 90 instructions on powerpc (32-bit).
This was measured with a statically-linked program written in
assembler which only does the 3 instructions needed to call _exit(0).

Before:

$ perf stat -e 0:1:u ./three

 Performance counter stats for './three':

           4858  instructions

    0.001274523  seconds time elapsed

After:

$ perf stat -e 0:1:u ./three

 Performance counter stats for './three':

             92  instructions

    0.000468153  seconds time elapsed

Reported-by: Vince Weaver <vince@deater.net>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <19016.41425.814043.870352@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-stat.c | 64 ++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 50 insertions(+), 14 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index c5a2907..201ef23 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -99,7 +99,7 @@ static u64			runtime_cycles_noise;
 #define ERR_PERF_OPEN \
 "Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n"
 
-static void create_perf_stat_counter(int counter)
+static void create_perf_stat_counter(int counter, int pid)
 {
 	struct perf_counter_attr *attr = attrs + counter;
 
@@ -119,7 +119,7 @@ static void create_perf_stat_counter(int counter)
 		attr->inherit	= inherit;
 		attr->disabled	= 1;
 
-		fd[0][counter] = sys_perf_counter_open(attr, 0, -1, -1, 0);
+		fd[0][counter] = sys_perf_counter_open(attr, pid, -1, -1, 0);
 		if (fd[0][counter] < 0 && verbose)
 			fprintf(stderr, ERR_PERF_OPEN, counter,
 				fd[0][counter], strerror(errno));
@@ -205,12 +205,58 @@ static int run_perf_stat(int argc, const char **argv)
 	int status = 0;
 	int counter;
 	int pid;
+	int child_ready_pipe[2], go_pipe[2];
+	char buf;
 
 	if (!system_wide)
 		nr_cpus = 1;
 
+	if (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0) {
+		perror("failed to create pipes");
+		exit(1);
+	}
+
+	if ((pid = fork()) < 0)
+		perror("failed to fork");
+
+	if (!pid) {
+		close(child_ready_pipe[0]);
+		close(go_pipe[1]);
+		fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
+
+		/*
+		 * Do a dummy execvp to get the PLT entry resolved,
+		 * so we avoid the resolver overhead on the real
+		 * execvp call.
+		 */
+		execvp("", (char **)argv);
+
+		/*
+		 * Tell the parent we're ready to go
+		 */
+		close(child_ready_pipe[1]);
+
+		/*
+		 * Wait until the parent tells us to go.
+		 */
+		read(go_pipe[0], &buf, 1);
+
+		execvp(argv[0], (char **)argv);
+
+		perror(argv[0]);
+		exit(-1);
+	}
+
+	/*
+	 * Wait for the child to be ready to exec.
+	 */
+	close(child_ready_pipe[1]);
+	close(go_pipe[0]);
+	read(child_ready_pipe[0], &buf, 1);
+	close(child_ready_pipe[0]);
+
 	for (counter = 0; counter < nr_counters; counter++)
-		create_perf_stat_counter(counter);
+		create_perf_stat_counter(counter, pid);
 
 	/*
 	 * Enable counters and exec the command:
@@ -218,19 +264,9 @@ static int run_perf_stat(int argc, const char **argv)
 	t0 = rdclock();
 	prctl(PR_TASK_PERF_COUNTERS_ENABLE);
 
-	if ((pid = fork()) < 0)
-		perror("failed to fork");
-
-	if (!pid) {
-		if (execvp(argv[0], (char **)argv)) {
-			perror(argv[0]);
-			exit(-1);
-		}
-	}
-
+	close(go_pipe[1]);
 	wait(&status);
 
-	prctl(PR_TASK_PERF_COUNTERS_DISABLE);
 	t1 = rdclock();
 
 	walltime_nsecs[run_idx] = t1 - t0;
-- 
cgit v1.1


From 57e7986ed142417498155ebcd5eaf617ac37136d Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Tue, 30 Jun 2009 16:07:19 +1000
Subject: perf_counter: Provide a way to enable counters on exec

This provides a way to mark a counter to be enabled on the next
exec. This is useful for measuring the total activity of a
program without including overhead from the process that
launches it.

This also changes the perf stat command to use this new
facility.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <19017.43927.838745.689203@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/builtin-stat.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 201ef23..2e03524 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -116,8 +116,9 @@ static void create_perf_stat_counter(int counter, int pid)
 					fd[cpu][counter], strerror(errno));
 		}
 	} else {
-		attr->inherit	= inherit;
-		attr->disabled	= 1;
+		attr->inherit	     = inherit;
+		attr->disabled	     = 1;
+		attr->enable_on_exec = 1;
 
 		fd[0][counter] = sys_perf_counter_open(attr, pid, -1, -1, 0);
 		if (fd[0][counter] < 0 && verbose)
@@ -262,7 +263,6 @@ static int run_perf_stat(int argc, const char **argv)
 	 * Enable counters and exec the command:
 	 */
 	t0 = rdclock();
-	prctl(PR_TASK_PERF_COUNTERS_ENABLE);
 
 	close(go_pipe[1]);
 	wait(&status);
-- 
cgit v1.1


From f5812a7a336fb952d819e4427b9a2dce02368e82 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 30 Jun 2009 11:43:17 -0300
Subject: perf_counter tools: Adjust only prelinked symbol's addresses

I.e. we can't handle these two kinds of files in the same way:

1) prelinked system library:

[acme@doppio pahole]$ readelf -s /usr/lib64/libdw-0.141.so | egrep 'FUNC.+GLOBAL.+dwfl_report_elf'
   278: 00000030450105a0   261 FUNC    GLOBAL DEFAULT   12 dwfl_report_elf@@ELFUTILS_0.122

2) not prelinked library with debug information from a -debuginfo package:

[acme@doppio pahole]$ readelf -s /usr/lib/debug/usr/lib64/libdw-0.141.so.debug | egrep 'FUNC.+GLOBAL.+dwfl_report_elf'
   629: 00000000000105a0   261 FUNC    GLOBAL DEFAULT   12 dwfl_report_elf
[acme@doppio pahole]$

Now the numbers I got for a pahole perf run are in line with
the numbers I get from oprofile.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <20090630144317.GB12663@ghostprotocols.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/util/symbol.c | 16 +++++++++++-----
 tools/perf/util/symbol.h |  3 ++-
 2 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 9c659ef..78c2efd 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -520,7 +520,9 @@ static int dso__load_sym(struct dso *self, int fd, const char *name,
 	nr_syms = shdr.sh_size / shdr.sh_entsize;
 
 	memset(&sym, 0, sizeof(sym));
-
+	self->prelinked = elf_section_by_name(elf, &ehdr, &shdr,
+					      ".gnu.prelink_undo",
+					      NULL) != NULL;
 	elf_symtab__for_each_symbol(syms, nr_syms, index, sym) {
 		struct symbol *f;
 		u64 obj_start;
@@ -535,11 +537,13 @@ static int dso__load_sym(struct dso *self, int fd, const char *name,
 		gelf_getshdr(sec, &shdr);
 		obj_start = sym.st_value;
 
-		if (verbose >= 2)
-			printf("adjusting symbol: st_value: %Lx sh_addr: %Lx sh_offset: %Lx\n",
-				(u64)sym.st_value, (u64)shdr.sh_addr, (u64)shdr.sh_offset);
+		if (self->prelinked) {
+			if (verbose >= 2)
+				printf("adjusting symbol: st_value: %Lx sh_addr: %Lx sh_offset: %Lx\n",
+					(u64)sym.st_value, (u64)shdr.sh_addr, (u64)shdr.sh_offset);
 
-		sym.st_value -= shdr.sh_addr - shdr.sh_offset;
+			sym.st_value -= shdr.sh_addr - shdr.sh_offset;
+		}
 
 		f = symbol__new(sym.st_value, sym.st_size,
 				elf_sym__name(&sym, symstrs),
@@ -573,6 +577,8 @@ int dso__load(struct dso *self, symbol_filter_t filter, int verbose)
 	if (!name)
 		return -1;
 
+	self->prelinked = 0;
+
 	if (strncmp(self->name, "/tmp/perf-", 10) == 0)
 		return dso__load_perf_map(self, filter, verbose);
 
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 940b432..2c48ace 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -20,8 +20,9 @@ struct symbol {
 struct dso {
 	struct list_head node;
 	struct rb_root	 syms;
-	unsigned int	 sym_priv_size;
 	struct symbol    *(*find_symbol)(struct dso *, u64 ip);
+	unsigned int	 sym_priv_size;
+	unsigned char	 prelinked;
 	char		 name[0];
 };
 
-- 
cgit v1.1


From 25903407da21552419e0955705d6d8c8e601cb2e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 30 Jun 2009 19:01:20 -0300
Subject: perf report: Add --dsos parameter

So that we can filter by dso. Symbols in other dsos won't be
accounted for.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1246399282-20934-2-git-send-email-acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/Documentation/perf-report.txt |   4 +
 tools/perf/Makefile                      |   2 +
 tools/perf/builtin-report.c              |  16 +++
 tools/perf/util/strlist.c                | 184 +++++++++++++++++++++++++++++++
 tools/perf/util/strlist.h                |  32 ++++++
 5 files changed, 238 insertions(+)
 create mode 100644 tools/perf/util/strlist.c
 create mode 100644 tools/perf/util/strlist.h

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 40c1db8..13d85ca 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -20,6 +20,10 @@ OPTIONS
 -i::
 --input=::
         Input file name. (default: perf.data)
+-d::
+--dsos=::
+	Only consider symbols in these dsos. CSV that understands
+	file://filename entries.
 
 SEE ALSO
 --------
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 1c1296d..9c6d0ae3 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -301,6 +301,7 @@ LIB_H += util/util.h
 LIB_H += util/help.h
 LIB_H += util/strbuf.h
 LIB_H += util/string.h
+LIB_H += util/strlist.h
 LIB_H += util/run-command.h
 LIB_H += util/sigchain.h
 LIB_H += util/symbol.h
@@ -322,6 +323,7 @@ LIB_OBJS += util/run-command.o
 LIB_OBJS += util/quote.o
 LIB_OBJS += util/strbuf.o
 LIB_OBJS += util/string.o
+LIB_OBJS += util/strlist.o
 LIB_OBJS += util/usage.o
 LIB_OBJS += util/wrapper.o
 LIB_OBJS += util/sigchain.o
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index ed391db..7c6b6e7 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -16,6 +16,7 @@
 #include "util/symbol.h"
 #include "util/string.h"
 #include "util/callchain.h"
+#include "util/strlist.h"
 
 #include "perf.h"
 #include "util/header.h"
@@ -32,6 +33,8 @@ static char		*vmlinux = NULL;
 
 static char		default_sort_order[] = "comm,dso";
 static char		*sort_order = default_sort_order;
+static char		*dso_list_str;
+static struct strlist	*dso_list;
 
 static int		input;
 static int		show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
@@ -1272,6 +1275,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 	if (show & show_mask) {
 		struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip);
 
+		if (dso_list && dso && dso->name && !strlist__has_entry(dso_list, dso->name))
+			return 0;
+
 		if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) {
 			eprintf("problem incrementing symbol count, skipping event\n");
 			return -1;
@@ -1659,6 +1665,8 @@ static const struct option options[] = {
 	OPT_BOOLEAN('x', "exclude-other", &exclude_other,
 		    "Only display entries with parent-match"),
 	OPT_BOOLEAN('c', "callchain", &callchain, "Display callchains"),
+	OPT_STRING('d', "dsos", &dso_list_str, "dso[,dso...]",
+		   "only consider symbols in these dsos"),
 	OPT_END()
 };
 
@@ -1698,6 +1706,14 @@ int cmd_report(int argc, const char **argv, const char *prefix)
 	if (argc)
 		usage_with_options(report_usage, options);
 
+	if (dso_list_str) {
+		dso_list = strlist__new(true, dso_list_str);
+		if (!dso_list) {
+			fprintf(stderr, "problems parsing dso list\n");
+			exit(129);
+		}
+	}
+
 	setup_pager();
 
 	return __cmd_report();
diff --git a/tools/perf/util/strlist.c b/tools/perf/util/strlist.c
new file mode 100644
index 0000000..025a78e
--- /dev/null
+++ b/tools/perf/util/strlist.c
@@ -0,0 +1,184 @@
+/*
+ * (c) 2009 Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Licensed under the GPLv2.
+ */
+
+#include "strlist.h"
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+static struct str_node *str_node__new(const char *s, bool dupstr)
+{
+	struct str_node *self = malloc(sizeof(*self));
+
+	if (self != NULL) {
+		if (dupstr) {
+			s = strdup(s);
+			if (s == NULL)
+				goto out_delete;
+		}
+		self->s = s;
+	}
+
+	return self;
+
+out_delete:
+	free(self);
+	return NULL;
+}
+
+static void str_node__delete(struct str_node *self, bool dupstr)
+{
+	if (dupstr)
+		free((void *)self->s);
+	free(self);
+}
+
+int strlist__add(struct strlist *self, const char *new_entry)
+{
+	struct rb_node **p = &self->entries.rb_node;
+	struct rb_node *parent = NULL;
+	struct str_node *sn;
+
+	while (*p != NULL) {
+		int rc;
+
+		parent = *p;
+		sn = rb_entry(parent, struct str_node, rb_node);
+		rc = strcmp(sn->s, new_entry);
+
+		if (rc > 0)
+			p = &(*p)->rb_left;
+		else if (rc < 0)
+			p = &(*p)->rb_right;
+		else
+			return -EEXIST;
+	}
+
+	sn = str_node__new(new_entry, self->dupstr);
+	if (sn == NULL)
+		return -ENOMEM;
+
+	rb_link_node(&sn->rb_node, parent, p);
+	rb_insert_color(&sn->rb_node, &self->entries);
+
+	return 0;
+}
+
+int strlist__load(struct strlist *self, const char *filename)
+{
+	char entry[1024];
+	int err;
+	FILE *fp = fopen(filename, "r");
+
+	if (fp == NULL)
+		return errno;
+
+	while (fgets(entry, sizeof(entry), fp) != NULL) {
+		const size_t len = strlen(entry);
+
+		if (len == 0)
+			continue;
+		entry[len - 1] = '\0';
+
+		err = strlist__add(self, entry);
+		if (err != 0)
+			goto out;
+	}
+
+	err = 0;
+out:
+	fclose(fp);
+	return err;
+}
+
+void strlist__remove(struct strlist *self, struct str_node *sn)
+{
+	rb_erase(&sn->rb_node, &self->entries);
+	str_node__delete(sn, self->dupstr);
+}
+
+bool strlist__has_entry(struct strlist *self, const char *entry)
+{
+	struct rb_node **p = &self->entries.rb_node;
+	struct rb_node *parent = NULL;
+
+	while (*p != NULL) {
+		struct str_node *sn;
+		int rc;
+
+		parent = *p;
+		sn = rb_entry(parent, struct str_node, rb_node);
+		rc = strcmp(sn->s, entry);
+
+		if (rc > 0)
+			p = &(*p)->rb_left;
+		else if (rc < 0)
+			p = &(*p)->rb_right;
+		else
+			return true;
+	}
+
+	return false;
+}
+
+static int strlist__parse_list_entry(struct strlist *self, const char *s)
+{
+	if (strncmp(s, "file://", 7) == 0)
+		return strlist__load(self, s + 7);
+
+	return strlist__add(self, s);
+}
+
+int strlist__parse_list(struct strlist *self, const char *s)
+{
+	char *sep;
+	int err;
+
+	while ((sep = strchr(s, ',')) != NULL) {
+		*sep = '\0';
+		err = strlist__parse_list_entry(self, s);
+		*sep = ',';
+		if (err != 0)
+			return err;
+		s = sep + 1;
+	}
+
+	return *s ? strlist__parse_list_entry(self, s) : 0;
+}
+
+struct strlist *strlist__new(bool dupstr, const char *slist)
+{
+	struct strlist *self = malloc(sizeof(*self));
+
+	if (self != NULL) {
+		self->entries = RB_ROOT;
+		self->dupstr = dupstr;
+		if (slist && strlist__parse_list(self, slist) != 0)
+			goto out_error;
+	}
+
+	return self;
+out_error:
+	free(self);
+	return NULL;
+}
+
+void strlist__delete(struct strlist *self)
+{
+	if (self != NULL) {
+		struct str_node *pos;
+		struct rb_node *next = rb_first(&self->entries);
+
+		while (next) {
+			pos = rb_entry(next, struct str_node, rb_node);
+			next = rb_next(&pos->rb_node);
+			strlist__remove(self, pos);
+		}
+		self->entries = RB_ROOT;
+		free(self);
+	}
+}
diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h
new file mode 100644
index 0000000..2fb117f
--- /dev/null
+++ b/tools/perf/util/strlist.h
@@ -0,0 +1,32 @@
+#ifndef STRLIST_H_
+#define STRLIST_H_
+
+#include "rbtree.h"
+#include <stdbool.h>
+
+struct str_node {
+	struct rb_node rb_node;
+	const char     *s;
+};
+
+struct strlist {
+	struct rb_root entries;
+	bool dupstr;
+};
+
+struct strlist *strlist__new(bool dupstr, const char *slist);
+void strlist__delete(struct strlist *self);
+
+void strlist__remove(struct strlist *self, struct str_node *sn);
+int strlist__load(struct strlist *self, const char *filename);
+int strlist__add(struct strlist *self, const char *str);
+
+bool strlist__has_entry(struct strlist *self, const char *entry);
+
+static inline bool strlist__empty(const struct strlist *self)
+{
+	return rb_first(&self->entries) == NULL;
+}
+
+int strlist__parse_list(struct strlist *self, const char *s);
+#endif /* STRLIST_H_ */
-- 
cgit v1.1


From cc8b88b15ab8e5ae162a46c4b6b286b555190dd1 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 30 Jun 2009 19:01:21 -0300
Subject: perf report: Add --comms parameter

So that we can filter by comm. Symbols in other comms won't be
accounted for.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1246399282-20934-3-git-send-email-acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/Documentation/perf-report.txt |  4 ++++
 tools/perf/builtin-report.c              | 33 ++++++++++++++++++++++----------
 2 files changed, 27 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 13d85ca..4c44ef1 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -24,6 +24,10 @@ OPTIONS
 --dsos=::
 	Only consider symbols in these dsos. CSV that understands
 	file://filename entries.
+-C::
+--comms=::
+	Only consider symbols in these comms. CSV that understands
+	file://filename entries.
 
 SEE ALSO
 --------
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 7c6b6e7..8143477 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -33,8 +33,8 @@ static char		*vmlinux = NULL;
 
 static char		default_sort_order[] = "comm,dso";
 static char		*sort_order = default_sort_order;
-static char		*dso_list_str;
-static struct strlist	*dso_list;
+static char		*dso_list_str, *comm_list_str;
+static struct strlist	*dso_list, *comm_list;
 
 static int		input;
 static int		show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
@@ -240,7 +240,7 @@ static u64 vdso__map_ip(struct map *map, u64 ip)
 
 static inline int is_anon_memory(const char *filename)
 {
-     return strcmp(filename, "//anon") == 0;
+	return strcmp(filename, "//anon") == 0;
 }
 
 static struct map *map__new(struct mmap_event *event)
@@ -1253,6 +1253,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 		return -1;
 	}
 
+	if (comm_list && !strlist__has_entry(comm_list, thread->comm))
+		return 0;
+
 	if (event->header.misc & PERF_EVENT_MISC_KERNEL) {
 		show = SHOW_KERNEL;
 		level = 'k';
@@ -1667,6 +1670,8 @@ static const struct option options[] = {
 	OPT_BOOLEAN('c', "callchain", &callchain, "Display callchains"),
 	OPT_STRING('d', "dsos", &dso_list_str, "dso[,dso...]",
 		   "only consider symbols in these dsos"),
+	OPT_STRING('C', "comms", &comm_list_str, "comm[,comm...]",
+		   "only consider symbols in these comms"),
 	OPT_END()
 };
 
@@ -1685,6 +1690,19 @@ static void setup_sorting(void)
 	free(str);
 }
 
+static void setup_list(struct strlist **list, const char *list_str,
+		       const char *list_name)
+{
+	if (list_str) {
+		*list = strlist__new(true, list_str);
+		if (!*list) {
+			fprintf(stderr, "problems parsing %s list\n",
+				list_name);
+			exit(129);
+		}
+	}
+}
+
 int cmd_report(int argc, const char **argv, const char *prefix)
 {
 	symbol__init();
@@ -1706,13 +1724,8 @@ int cmd_report(int argc, const char **argv, const char *prefix)
 	if (argc)
 		usage_with_options(report_usage, options);
 
-	if (dso_list_str) {
-		dso_list = strlist__new(true, dso_list_str);
-		if (!dso_list) {
-			fprintf(stderr, "problems parsing dso list\n");
-			exit(129);
-		}
-	}
+	setup_list(&dso_list, dso_list_str, "dso");
+	setup_list(&comm_list, comm_list_str, "comm");
 
 	setup_pager();
 
-- 
cgit v1.1


From 7bec7a9134c25cecb0d7029199b59f7b1bef35b8 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 30 Jun 2009 19:01:22 -0300
Subject: perf report: Add --symbols parameter

So that we can filter by symbol name.

The 'pfunct' utility in the 'dwarves' package can be used to
create a file with the functions one wants.

Example:

[acme@doppio pahole]$ pfunct /usr/lib/debug/usr/lib64/libdw-0.141.so.debug | grep dwarf > /tmp/dwarf.symbols
[acme@doppio pahole]$ wc -l /tmp/dwarf.symbols
93 /tmp/dwarf.symbols
[acme@doppio pahole]$ head -3 /tmp/dwarf.symbols
dwfl_addrdwarf
dwfl_module_getdwarf
dwfl_getdwarf
[acme@doppio pahole]$ perf report --sort comm,dso,symbol --comms pahole --dsos /usr/lib64/libdw-0.141.so --symbols file:///tmp/dwarf.symbols

    33.99%            pahole  /usr/lib64/libdw-0.141.so  [.] dwarf_tag
    29.07%            pahole  /usr/lib64/libdw-0.141.so  [.] dwarf_decl_file
    27.71%            pahole  /usr/lib64/libdw-0.141.so  [.] dwarf_getsrclines
     4.54%            pahole  /usr/lib64/libdw-0.141.so  0x00000000007400
     3.93%            pahole  /usr/lib64/libdw-0.141.so  [.] dwarf_decl_line
     0.46%            pahole  /usr/lib64/libdw-0.141.so  [.] dwarf_getlocation
     0.18%            pahole  /usr/lib64/libdw-0.141.so  [.] __libdwarf_next_prime
     0.13%            pahole  /usr/lib64/libdw-0.141.so  [.] dwarf_diecu

[acme@doppio pahole]$

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1246399282-20934-4-git-send-email-acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 tools/perf/Documentation/perf-report.txt |  4 ++++
 tools/perf/builtin-report.c              | 10 ++++++++--
 2 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 4c44ef1..8aa3f8c 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -28,6 +28,10 @@ OPTIONS
 --comms=::
 	Only consider symbols in these comms. CSV that understands
 	file://filename entries.
+-S::
+--symbols=::
+	Only consider these symbols. CSV that understands
+	file://filename entries.
 
 SEE ALSO
 --------
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 8143477..135b783 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -33,8 +33,8 @@ static char		*vmlinux = NULL;
 
 static char		default_sort_order[] = "comm,dso";
 static char		*sort_order = default_sort_order;
-static char		*dso_list_str, *comm_list_str;
-static struct strlist	*dso_list, *comm_list;
+static char		*dso_list_str, *comm_list_str, *sym_list_str;
+static struct strlist	*dso_list, *comm_list, *sym_list;
 
 static int		input;
 static int		show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
@@ -1281,6 +1281,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 		if (dso_list && dso && dso->name && !strlist__has_entry(dso_list, dso->name))
 			return 0;
 
+		if (sym_list && sym && !strlist__has_entry(sym_list, sym->name))
+			return 0;
+
 		if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) {
 			eprintf("problem incrementing symbol count, skipping event\n");
 			return -1;
@@ -1672,6 +1675,8 @@ static const struct option options[] = {
 		   "only consider symbols in these dsos"),
 	OPT_STRING('C', "comms", &comm_list_str, "comm[,comm...]",
 		   "only consider symbols in these comms"),
+	OPT_STRING('S', "symbols", &sym_list_str, "symbol[,symbol...]",
+		   "only consider these symbols"),
 	OPT_END()
 };
 
@@ -1726,6 +1731,7 @@ int cmd_report(int argc, const char **argv, const char *prefix)
 
 	setup_list(&dso_list, dso_list_str, "dso");
 	setup_list(&comm_list, comm_list_str, "comm");
+	setup_list(&sym_list, sym_list_str, "symbol");
 
 	setup_pager();
 
-- 
cgit v1.1