summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/radeonsi/si_shader.c
diff options
context:
space:
mode:
authorMarek Olšák <marek.olsak@amd.com>2016-09-10 00:29:31 +0200
committerMarek Olšák <marek.olsak@amd.com>2016-09-12 21:06:57 +0200
commit546bc07349d0e53b71766780cf726fe21f7d6983 (patch)
treeef256b4ae1740e37f2bd9a0703eeeef8db358edb /src/gallium/drivers/radeonsi/si_shader.c
parente2fb0441150e81f7ba30db1c8e8583858c3d8d5e (diff)
downloadexternal_mesa3d-546bc07349d0e53b71766780cf726fe21f7d6983.zip
external_mesa3d-546bc07349d0e53b71766780cf726fe21f7d6983.tar.gz
external_mesa3d-546bc07349d0e53b71766780cf726fe21f7d6983.tar.bz2
radeonsi: don't preload constants at the beginning of shaders
LLVM can CSE the loads, thus we can always re-load constants before each use. The decrease in SGPR spilling is huge. The best improvements are the dumbest ones. 26011 shaders in 14651 tests Totals: SGPRS: 1453346 -> 1251920 (-13.86 %) VGPRS: 742576 -> 728421 (-1.91 %) Spilled SGPRs: 52298 -> 16644 (-68.17 %) Spilled VGPRs: 397 -> 369 (-7.05 %) Scratch VGPRs: 1372 -> 1344 (-2.04 %) dwords per thread Code Size: 36136488 -> 36001064 (-0.37 %) bytes LDS: 767 -> 767 (0.00 %) blocks Max Waves: 219315 -> 222221 (1.33 %) Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net> Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Diffstat (limited to 'src/gallium/drivers/radeonsi/si_shader.c')
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c31
1 files changed, 11 insertions, 20 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 0b7de18..6801722 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -107,7 +107,6 @@ struct si_shader_context
LLVMValueRef const_buffers[SI_NUM_CONST_BUFFERS];
LLVMValueRef lds;
- LLVMValueRef *constants[SI_NUM_CONST_BUFFERS];
LLVMValueRef shader_buffers[SI_NUM_SHADER_BUFFERS];
LLVMValueRef sampler_views[SI_NUM_SAMPLERS];
LLVMValueRef sampler_states[SI_NUM_SAMPLERS];
@@ -1881,12 +1880,19 @@ static LLVMValueRef fetch_constant(
idx = reg->Register.Index * 4 + swizzle;
if (!reg->Register.Indirect && !reg->Dimension.Indirect) {
+ LLVMValueRef c0, c1;
+
+ c0 = buffer_load_const(ctx, ctx->const_buffers[buf],
+ LLVMConstInt(ctx->i32, idx * 4, 0));
+
if (!tgsi_type_is_64bit(type))
- return bitcast(bld_base, type, ctx->constants[buf][idx]);
+ return bitcast(bld_base, type, c0);
else {
+ c1 = buffer_load_const(ctx, ctx->const_buffers[buf],
+ LLVMConstInt(ctx->i32,
+ (idx + 1) * 4, 0));
return radeon_llvm_emit_fetch_64bit(bld_base, type,
- ctx->constants[buf][idx],
- ctx->constants[buf][idx + 1]);
+ c0, c1);
}
}
@@ -5796,25 +5802,12 @@ static void preload_constants(struct si_shader_context *ctx)
LLVMValueRef ptr = LLVMGetParam(ctx->radeon_bld.main_fn, SI_PARAM_CONST_BUFFERS);
for (buf = 0; buf < SI_NUM_CONST_BUFFERS; buf++) {
- unsigned i, num_const = info->const_file_max[buf] + 1;
-
- if (num_const == 0)
+ if (info->const_file_max[buf] == -1)
continue;
- /* Allocate space for the constant values */
- ctx->constants[buf] = CALLOC(num_const * 4, sizeof(LLVMValueRef));
-
/* Load the resource descriptor */
ctx->const_buffers[buf] =
build_indexed_load_const(ctx, ptr, lp_build_const_int32(gallivm, buf));
-
- /* Load the constants, we rely on the code sinking to do the rest */
- for (i = 0; i < num_const * 4; ++i) {
- ctx->constants[buf][i] =
- buffer_load_const(ctx,
- ctx->const_buffers[buf],
- lp_build_const_int32(gallivm, i * 4));
- }
}
}
@@ -6905,8 +6898,6 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
}
out:
- for (int i = 0; i < SI_NUM_CONST_BUFFERS; i++)
- FREE(ctx.constants[i]);
return r;
}