summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/brw_program.c
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2012-11-27 14:10:52 -0800
committerEric Anholt <eric@anholt.net>2012-12-05 14:29:44 -0800
commit71f06344a0d72a6bd27750ceca571fc016b8de85 (patch)
tree4a32ebc3e5bff0ad16665a5a0737b2da1c0e0683 /src/mesa/drivers/dri/i965/brw_program.c
parentef2fbf67d4bd941a9a0e1c6f8515fb4911e05c50 (diff)
downloadexternal_mesa3d-71f06344a0d72a6bd27750ceca571fc016b8de85.zip
external_mesa3d-71f06344a0d72a6bd27750ceca571fc016b8de85.tar.gz
external_mesa3d-71f06344a0d72a6bd27750ceca571fc016b8de85.tar.bz2
i965: Add a debug flag for counting cycles spent in each compiled shader.
This can be used for two purposes: Using hand-coded shaders to determine per-instruction timings, or figuring out which shader to optimize in a whole application. Note that this doesn't cover the instructions that set up the message to the URB/FB write -- we'd need to convert the MRF usage in these instructions to GRFs so that our offsets/times don't overwrite our shader outputs. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> (v1) v2: Check the timestamp reset flag in the VS, which is apparently getting set fairly regularly in the range we watch, resulting in negative numbers getting added to our 32-bit counter, and thus large values added to our uint64_t. v3: Rebase on reladdr changes, removing a new safety check that proved impossible to satisfy. Add a comment to the AOP defs from Ken's review, and put them in a slightly more sensible spot. v4: Check timestamp reset in the FS as well.
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_program.c')
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.c127
1 files changed, 127 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index 6bf5a6a..1859041 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -189,3 +189,130 @@ void brwInitFragProgFuncs( struct dd_function_table *functions )
functions->LinkShader = brw_link_shader;
}
+void
+brw_init_shader_time(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+
+ const int max_entries = 4096;
+ brw->shader_time.bo = drm_intel_bo_alloc(intel->bufmgr, "shader time",
+ max_entries * 4, 4096);
+ brw->shader_time.programs = rzalloc_array(brw, struct gl_shader_program *,
+ max_entries);
+ brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
+ max_entries);
+ brw->shader_time.cumulative = rzalloc_array(brw, uint64_t,
+ max_entries);
+ brw->shader_time.max_entries = max_entries;
+}
+
+static int
+compare_time(const void *a, const void *b)
+{
+ uint64_t * const *a_val = a;
+ uint64_t * const *b_val = b;
+
+ /* We don't just subtract because we're turning the value to an int. */
+ if (**a_val < **b_val)
+ return -1;
+ else if (**a_val == **b_val)
+ return 0;
+ else
+ return 1;
+}
+
+static void
+brw_report_shader_time(struct brw_context *brw)
+{
+ if (!brw->shader_time.bo || !brw->shader_time.num_entries)
+ return;
+
+ uint64_t *sorted[brw->shader_time.num_entries];
+ double total = 0;
+ for (int i = 0; i < brw->shader_time.num_entries; i++) {
+ sorted[i] = &brw->shader_time.cumulative[i];
+ total += brw->shader_time.cumulative[i];
+ }
+
+ if (total == 0) {
+ printf("No shader time collected yet\n");
+ return;
+ }
+
+ qsort(sorted, brw->shader_time.num_entries, sizeof(sorted[0]), compare_time);
+
+ printf("\n");
+ printf("type ID cycles spent %% of total\n");
+ for (int s = 0; s < brw->shader_time.num_entries; s++) {
+ /* Work back from the sorted pointers times to a time to print. */
+ int i = sorted[s] - brw->shader_time.cumulative;
+
+ int shader_num = -1;
+ if (brw->shader_time.programs[i]) {
+ shader_num = brw->shader_time.programs[i]->Name;
+ }
+
+ switch (brw->shader_time.types[i]) {
+ case ST_VS:
+ printf("vs %4d: ", shader_num);
+ break;
+ case ST_FS8:
+ printf("fs8 %4d: ", shader_num);
+ break;
+ case ST_FS16:
+ printf("fs16 %4d: ", shader_num);
+ break;
+ default:
+ printf("other: ");
+ break;
+ }
+
+ printf("%16lld (%7.2f Gcycles) %4.1f%%\n",
+ (long long)brw->shader_time.cumulative[i],
+ (double)brw->shader_time.cumulative[i] / 1000000000.0,
+ (double)brw->shader_time.cumulative[i] / total * 100.0);
+ }
+}
+
+static void
+brw_collect_shader_time(struct brw_context *brw)
+{
+ if (!brw->shader_time.bo)
+ return;
+
+ /* This probably stalls on the last rendering. We could fix that by
+ * delaying reading the reports, but it doesn't look like it's a big
+ * overhead compared to the cost of tracking the time in the first place.
+ */
+ drm_intel_bo_map(brw->shader_time.bo, true);
+
+ uint32_t *times = brw->shader_time.bo->virtual;
+
+ for (int i = 0; i < brw->shader_time.num_entries; i++) {
+ brw->shader_time.cumulative[i] += times[i];
+ }
+
+ /* Zero the BO out to clear it out for our next collection.
+ */
+ memset(times, 0, brw->shader_time.bo->size);
+ drm_intel_bo_unmap(brw->shader_time.bo);
+}
+
+void
+brw_collect_and_report_shader_time(struct brw_context *brw)
+{
+ brw_collect_shader_time(brw);
+
+ if (brw->shader_time.report_time == 0 ||
+ get_time() - brw->shader_time.report_time >= 1.0) {
+ brw_report_shader_time(brw);
+ brw->shader_time.report_time = get_time();
+ }
+}
+
+void
+brw_destroy_shader_time(struct brw_context *brw)
+{
+ drm_intel_bo_unreference(brw->shader_time.bo);
+ brw->shader_time.bo = NULL;
+}