1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
|
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>
#include "trace_reader.h"
#include "armdis.h"
struct MyStaticRec {
StaticRec bb;
uint32_t *insns;
uint32_t *cycles; // number of cycles for each insn
uint32_t elapsed; // number of cycles for basic block
int freq; // execution frequency
MyStaticRec *inner; // pointer to an inner basic block
int is_thumb;
};
MyStaticRec **assign_inner_blocks(int num_blocks, MyStaticRec *blocks);
// This function is called from quicksort to compare addresses of basic
// blocks.
int cmp_inc_addr(const void *a, const void *b) {
MyStaticRec *bb1, *bb2;
bb1 = *(MyStaticRec**)a;
bb2 = *(MyStaticRec**)b;
if (bb1->bb.bb_addr < bb2->bb.bb_addr)
return -1;
if (bb1->bb.bb_addr > bb2->bb.bb_addr)
return 1;
return bb1->bb.bb_num - bb2->bb.bb_num;
}
// This function is called from quicksort to compare the elapsed time
// of basic blocks.
int cmp_dec_elapsed(const void *a, const void *b) {
MyStaticRec *bb1, *bb2;
bb1 = *(MyStaticRec**)a;
bb2 = *(MyStaticRec**)b;
if (bb1->elapsed < bb2->elapsed)
return 1;
if (bb1->elapsed > bb2->elapsed)
return -1;
return bb1->bb.bb_num - bb2->bb.bb_num;
}
// This function is called from quicksort to compare frequencies of
// basic blocks.
int cmp_dec_freq(const void *a, const void *b) {
MyStaticRec *bb1, *bb2;
bb1 = *(MyStaticRec**)a;
bb2 = *(MyStaticRec**)b;
if (bb1->freq < bb2->freq)
return 1;
if (bb1->freq > bb2->freq)
return -1;
return bb1->bb.bb_num - bb2->bb.bb_num;
}
int main(int argc, char **argv)
{
if (argc != 2) {
fprintf(stderr, "Usage: %s trace_file\n", argv[0]);
exit(1);
}
char *trace_filename = argv[1];
TraceReaderBase *trace = new TraceReaderBase;
trace->Open(trace_filename);
TraceHeader *header = trace->GetHeader();
uint32_t num_static_bb = header->num_static_bb;
// Allocate space for all of the static blocks
MyStaticRec *blocks = new MyStaticRec[num_static_bb];
// Read in all the static blocks
for (uint32_t ii = 0; ii < num_static_bb; ++ii) {
trace->ReadStatic(&blocks[ii].bb);
blocks[ii].is_thumb = blocks[ii].bb.bb_addr & 1;
blocks[ii].bb.bb_addr &= ~1;
uint32_t num_insns = blocks[ii].bb.num_insns;
blocks[ii].insns = new uint32_t[num_insns];
blocks[ii].cycles = new uint32_t[num_insns];
memset(blocks[ii].cycles, 0, num_insns * sizeof(uint32_t));
trace->ReadStaticInsns(num_insns, blocks[ii].insns);
blocks[ii].elapsed = 0;
blocks[ii].freq = 0;
blocks[ii].inner = NULL;
}
MyStaticRec **sorted = assign_inner_blocks(num_static_bb, blocks);
uint32_t prev_time = 0;
uint32_t elapsed = 0;
uint32_t dummy;
uint32_t *cycle_ptr = &dummy;
uint32_t *bb_elapsed_ptr = &dummy;
while (1) {
BBEvent event;
if (trace->ReadBB(&event))
break;
// Assign frequencies to each basic block
uint64_t bb_num = event.bb_num;
int num_insns = event.num_insns;
blocks[bb_num].freq += 1;
for (MyStaticRec *bptr = blocks[bb_num].inner; bptr; bptr = bptr->inner)
bptr->freq += 1;
// Assign simulation time to each instruction
for (MyStaticRec *bptr = &blocks[bb_num]; bptr; bptr = bptr->inner) {
uint32_t bb_num_insns = bptr->bb.num_insns;
for (uint32_t ii = 0; num_insns && ii < bb_num_insns; ++ii, --num_insns) {
uint32_t sim_time = trace->ReadInsnTime(event.time);
elapsed = sim_time - prev_time;
prev_time = sim_time;
// Attribute the elapsed time to the previous instruction and
// basic block.
*cycle_ptr += elapsed;
*bb_elapsed_ptr += elapsed;
cycle_ptr = &bptr->cycles[ii];
bb_elapsed_ptr = &bptr->elapsed;
}
}
}
*cycle_ptr += 1;
*bb_elapsed_ptr += 1;
// Sort the basic blocks into decreasing elapsed time
qsort(sorted, num_static_bb, sizeof(MyStaticRec*), cmp_dec_elapsed);
char spaces[80];
memset(spaces, ' ', 79);
spaces[79] = 0;
for (uint32_t ii = 0; ii < num_static_bb; ++ii) {
printf("bb %lld addr: 0x%x, insns: %d freq: %u elapsed: %u\n",
sorted[ii]->bb.bb_num, sorted[ii]->bb.bb_addr,
sorted[ii]->bb.num_insns, sorted[ii]->freq,
sorted[ii]->elapsed);
int num_insns = sorted[ii]->bb.num_insns;
uint32_t addr = sorted[ii]->bb.bb_addr;
for (int jj = 0; jj < num_insns; ++jj) {
uint32_t elapsed = sorted[ii]->cycles[jj];
uint32_t insn = sorted[ii]->insns[jj];
if (insn_is_thumb(insn)) {
insn = insn_unwrap_thumb(insn);
// thumb_pair is true if this is the first of a pair of
// thumb instructions (BL or BLX).
bool thumb_pair = ((insn & 0xf800) == 0xf000);
// Get the next thumb instruction (if any) because we may need
// it for the case where insn is BL or BLX.
uint32_t insn2 = 0;
if (thumb_pair && (jj + 1 < num_insns)) {
insn2 = sorted[ii]->insns[jj + 1];
insn2 = insn_unwrap_thumb(insn2);
jj += 1;
}
char *disasm = disasm_insn_thumb(addr, insn, insn2, NULL);
if (thumb_pair) {
printf(" %4u %08x %04x %04x %s\n", elapsed, addr, insn,
insn2, disasm);
addr += 2;
} else {
printf(" %4u %08x %04x %s\n", elapsed, addr, insn,
disasm);
}
addr += 2;
} else {
char *disasm = Arm::disasm(addr, insn, NULL);
printf(" %4u %08x %08x %s\n", elapsed, addr, insn, disasm);
addr += 4;
}
}
}
delete[] sorted;
return 0;
}
// Find the basic blocks that are subsets of other basic blocks.
MyStaticRec **assign_inner_blocks(int num_blocks, MyStaticRec *blocks)
{
int ii;
uint32_t addr_end, addr_diff;
// Create a list of pointers to the basic blocks that we can sort.
MyStaticRec **sorted = new MyStaticRec*[num_blocks];
for (ii = 0; ii < num_blocks; ++ii) {
sorted[ii] = &blocks[ii];
}
// Sort the basic blocks into increasing address order
qsort(sorted, num_blocks, sizeof(MyStaticRec*), cmp_inc_addr);
// Create pointers to inner blocks and break up the enclosing block
// so that there is no overlap.
for (ii = 0; ii < num_blocks - 1; ++ii) {
int num_bytes;
if (sorted[ii]->is_thumb)
num_bytes = sorted[ii]->bb.num_insns << 1;
else
num_bytes = sorted[ii]->bb.num_insns << 2;
addr_end = sorted[ii]->bb.bb_addr + num_bytes;
if (addr_end > sorted[ii + 1]->bb.bb_addr) {
sorted[ii]->inner = sorted[ii + 1];
addr_diff = sorted[ii + 1]->bb.bb_addr - sorted[ii]->bb.bb_addr;
uint32_t num_insns;
if (sorted[ii]->is_thumb)
num_insns = addr_diff >> 1;
else
num_insns = addr_diff >> 2;
sorted[ii]->bb.num_insns = num_insns;
}
}
return sorted;
}
|