diff options
author | Marek Olšák <marek.olsak@amd.com> | 2015-08-19 11:53:25 +0200 |
---|---|---|
committer | Marek Olšák <marek.olsak@amd.com> | 2015-08-26 19:25:19 +0200 |
commit | 2c14a6d3b1c53d5814414ce9e91fd8d24c90b787 (patch) | |
tree | 86e88808c7bed71d6a1716fbaaf46dec6d7539e1 | |
parent | 189953ee13ad7d6b5d9d04ac21a230e8137a700d (diff) | |
download | external_mesa3d-2c14a6d3b1c53d5814414ce9e91fd8d24c90b787.zip external_mesa3d-2c14a6d3b1c53d5814414ce9e91fd8d24c90b787.tar.gz external_mesa3d-2c14a6d3b1c53d5814414ce9e91fd8d24c90b787.tar.bz2 |
radeonsi: add IB tracing support for debug contexts
This adds trace points to all IBs and the parser prints them and also
prints which trace points were reached (executed) by the CP.
This can help pinpoint a problematic packet, draw call, etc.
Acked-by: Christian König <christian.koenig@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r-- | src/gallium/drivers/radeonsi/si_debug.c | 67 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_hw_context.c | 24 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_pipe.h | 9 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_state_draw.c | 19 |
5 files changed, 105 insertions, 16 deletions
diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c index 72b7989..cf09686 100644 --- a/src/gallium/drivers/radeonsi/si_debug.c +++ b/src/gallium/drivers/radeonsi/si_debug.c @@ -142,7 +142,8 @@ static void si_parse_set_reg_packet(FILE *f, uint32_t *ib, unsigned count, si_dump_reg(f, reg + i*4, ib[2+i], ~0); } -static uint32_t *si_parse_packet3(FILE *f, uint32_t *ib, int *num_dw) +static uint32_t *si_parse_packet3(FILE *f, uint32_t *ib, int *num_dw, + int trace_id) { unsigned count = PKT_COUNT_G(ib[0]); unsigned op = PKT3_IT_OPCODE_G(ib[0]); @@ -232,6 +233,36 @@ static uint32_t *si_parse_packet3(FILE *f, uint32_t *ib, int *num_dw) if (ib[0] == 0xffff1000) { count = -1; /* One dword NOP. */ break; + } else if (count == 0 && SI_IS_TRACE_POINT(ib[1])) { + unsigned packet_id = SI_GET_TRACE_POINT_ID(ib[1]); + + print_spaces(f, INDENT_PKT); + fprintf(f, COLOR_RED "Trace point ID: %u\n", packet_id); + + if (trace_id == -1) + break; /* tracing was disabled */ + + print_spaces(f, INDENT_PKT); + if (packet_id < trace_id) + fprintf(f, COLOR_RED + "This trace point was reached by the CP." + COLOR_RESET "\n"); + else if (packet_id == trace_id) + fprintf(f, COLOR_RED + "!!!!! This is the last trace point that " + "was reached by the CP !!!!!" + COLOR_RESET "\n"); + else if (packet_id+1 == trace_id) + fprintf(f, COLOR_RED + "!!!!! This is the first trace point that " + "was NOT been reached by the CP !!!!!" + COLOR_RESET "\n"); + else + fprintf(f, COLOR_RED + "!!!!! This trace point was NOT reached " + "by the CP !!!!!" + COLOR_RESET "\n"); + break; } /* fall through, print all dwords */ default: @@ -246,7 +277,17 @@ static uint32_t *si_parse_packet3(FILE *f, uint32_t *ib, int *num_dw) return ib; } -static void si_parse_ib(FILE *f, uint32_t *ib, int num_dw) +/** + * Parse and print an IB into a file. + * + * \param f file + * \param ib IB + * \param num_dw size of the IB + * \param chip_class chip class + * \param trace_id the last trace ID that is known to have been reached + * and executed by the CP, typically read from a buffer + */ +static void si_parse_ib(FILE *f, uint32_t *ib, int num_dw, int trace_id) { fprintf(f, "------------------ IB begin ------------------\n"); @@ -255,7 +296,7 @@ static void si_parse_ib(FILE *f, uint32_t *ib, int num_dw) switch (type) { case 3: - ib = si_parse_packet3(f, ib, &num_dw); + ib = si_parse_packet3(f, ib, &num_dw, trace_id); break; case 2: /* type-2 nop */ @@ -342,9 +383,27 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f, si_dump_shader(sctx->ps_shader, "Fragment", f); if (sctx->last_ib) { - si_parse_ib(f, sctx->last_ib, sctx->last_ib_dw_size); + int last_trace_id = -1; + + if (sctx->last_trace_buf) { + /* We are expecting that the ddebug pipe has already + * waited for the context, so this buffer should be idle. + * If the GPU is hung, there is no point in waiting for it. + */ + uint32_t *map = + sctx->b.ws->buffer_map(sctx->last_trace_buf->cs_buf, + NULL, + PIPE_TRANSFER_UNSYNCHRONIZED | + PIPE_TRANSFER_READ); + if (map) + last_trace_id = *map; + } + + si_parse_ib(f, sctx->last_ib, sctx->last_ib_dw_size, + last_trace_id); free(sctx->last_ib); /* dump only once */ sctx->last_ib = NULL; + r600_resource_reference(&sctx->last_trace_buf, NULL); } fprintf(f, "Done.\n"); diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index b726eb3..110e316 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -89,7 +89,7 @@ void si_need_cs_space(struct si_context *ctx, unsigned num_dw, num_dw += ctx->atoms.s.cache_flush->num_dw; if (ctx->screen->b.trace_bo) - num_dw += SI_TRACE_CS_DWORDS; + num_dw += SI_TRACE_CS_DWORDS * 2; /* Flush if there's not enough space. */ if (num_dw > cs->max_dw) { @@ -127,12 +127,17 @@ void si_context_gfx_flush(void *context, unsigned flags, /* force to keep tiling flags */ flags |= RADEON_FLUSH_KEEP_TILING_FLAGS; + if (ctx->trace_buf) + si_trace_emit(ctx); + /* Save the IB for debug contexts. */ if (ctx->is_debug) { free(ctx->last_ib); ctx->last_ib_dw_size = cs->cdw; ctx->last_ib = malloc(cs->cdw * 4); memcpy(ctx->last_ib, cs->buf, cs->cdw * 4); + r600_resource_reference(&ctx->last_trace_buf, ctx->trace_buf); + r600_resource_reference(&ctx->trace_buf, NULL); } /* Flush the CS. */ @@ -148,6 +153,23 @@ void si_context_gfx_flush(void *context, unsigned flags, void si_begin_new_cs(struct si_context *ctx) { + if (ctx->is_debug) { + uint32_t zero = 0; + + /* Create a buffer used for writing trace IDs and initialize it to 0. */ + assert(!ctx->trace_buf); + ctx->trace_buf = (struct r600_resource*) + pipe_buffer_create(ctx->b.b.screen, PIPE_BIND_CUSTOM, + PIPE_USAGE_STAGING, 4); + if (ctx->trace_buf) + pipe_buffer_write_nooverlap(&ctx->b.b, &ctx->trace_buf->b.b, + 0, sizeof(zero), &zero); + ctx->trace_id = 0; + } + + if (ctx->trace_buf) + si_trace_emit(ctx); + /* Flush read caches at the beginning of CS. */ ctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER | SI_CONTEXT_INV_TC_L1 | diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index e5900b7..92c6ae3 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -81,6 +81,8 @@ static void si_destroy_context(struct pipe_context *context) LLVMDisposeTargetMachine(sctx->tm); #endif + r600_resource_reference(&sctx->trace_buf, NULL); + r600_resource_reference(&sctx->last_trace_buf, NULL); free(sctx->last_ib); FREE(sctx); } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 09a21ce..52167f2 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -43,7 +43,7 @@ #define SI_RESTART_INDEX_UNKNOWN INT_MIN #define SI_NUM_SMOOTH_AA_SAMPLES 8 -#define SI_TRACE_CS_DWORDS 6 +#define SI_TRACE_CS_DWORDS 7 #define SI_MAX_DRAW_CS_DWORDS \ (/*scratch:*/ 3 + /*derived prim state:*/ 3 + \ @@ -81,6 +81,10 @@ SI_CONTEXT_FLUSH_AND_INV_DB | \ SI_CONTEXT_FLUSH_AND_INV_DB_META) +#define SI_ENCODE_TRACE_POINT(id) (0xcafe0000 | ((id) & 0xffff)) +#define SI_IS_TRACE_POINT(x) (((x) & 0xcafe0000) == 0xcafe0000) +#define SI_GET_TRACE_POINT_ID(x) ((x) & 0xffff) + struct si_compute; struct si_screen { @@ -247,6 +251,9 @@ struct si_context { bool is_debug; uint32_t *last_ib; unsigned last_ib_dw_size; + struct r600_resource *last_trace_buf; + struct r600_resource *trace_buf; + unsigned trace_id; }; /* cik_sdma.c */ diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index e56c9e7..b1aba12 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -835,7 +835,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) si_emit_draw_registers(sctx, info); si_emit_draw_packets(sctx, info, &ib); - if (sctx->screen->b.trace_bo) + if (sctx->trace_buf) si_trace_emit(sctx); /* Workaround for a VGT hang when streamout is enabled. @@ -873,19 +873,18 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) void si_trace_emit(struct si_context *sctx) { - struct si_screen *sscreen = sctx->screen; struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; - uint64_t va; - va = sscreen->b.trace_bo->gpu_address; - r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, sscreen->b.trace_bo, + sctx->trace_id++; + r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, sctx->trace_buf, RADEON_USAGE_READWRITE, RADEON_PRIO_MIN); - radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 4, 0)); + radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); radeon_emit(cs, PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_MEM_SYNC) | PKT3_WRITE_DATA_WR_CONFIRM | PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME)); - radeon_emit(cs, va & 0xFFFFFFFFUL); - radeon_emit(cs, (va >> 32UL) & 0xFFFFFFFFUL); - radeon_emit(cs, cs->cdw); - radeon_emit(cs, sscreen->b.cs_count); + radeon_emit(cs, sctx->trace_buf->gpu_address); + radeon_emit(cs, sctx->trace_buf->gpu_address >> 32); + radeon_emit(cs, sctx->trace_id); + radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); + radeon_emit(cs, SI_ENCODE_TRACE_POINT(sctx->trace_id)); } |