summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/radeonsi/si_shader.c
diff options
context:
space:
mode:
authorMarek Olšák <marek.olsak@amd.com>2016-09-13 17:33:23 +0200
committerMarek Olšák <marek.olsak@amd.com>2016-09-14 12:33:00 +0200
commitab29788250a705eb0dd517cb3d38f37f944eb8ad (patch)
tree5b14ecf4f70cc117e4c8e6b47c8c0209a7f4559d /src/gallium/drivers/radeonsi/si_shader.c
parent007b512f9d05875b0dda956230fd3882dfea30af (diff)
downloadexternal_mesa3d-ab29788250a705eb0dd517cb3d38f37f944eb8ad.zip
external_mesa3d-ab29788250a705eb0dd517cb3d38f37f944eb8ad.tar.gz
external_mesa3d-ab29788250a705eb0dd517cb3d38f37f944eb8ad.tar.bz2
radeonsi: reload PS inputs with direct indexing at each use (v2)
The LLVM compiler can CSE interp intrinsics thanks to LLVMReadNoneAttribute. 26011 shaders in 14651 tests Totals: SGPRS: 1146340 -> 1132676 (-1.19 %) VGPRS: 727371 -> 711730 (-2.15 %) Spilled SGPRs: 2218 -> 2078 (-6.31 %) Spilled VGPRs: 369 -> 369 (0.00 %) Scratch VGPRs: 1344 -> 1344 (0.00 %) dwords per thread Code Size: 35841268 -> 36009732 (0.47 %) bytes LDS: 767 -> 767 (0.00 %) blocks Max Waves: 222559 -> 224779 (1.00 %) Wait states: 0 -> 0 (0.00 %) v2: don't call load_input for fragment shaders in emit_declaration Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Diffstat (limited to 'src/gallium/drivers/radeonsi/si_shader.c')
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c27
1 files changed, 11 insertions, 16 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index b034837..3ccff7a 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -440,7 +440,8 @@ static LLVMValueRef get_instance_index_for_fetch(
static void declare_input_vs(
struct radeon_llvm_context *radeon_bld,
unsigned input_index,
- const struct tgsi_full_declaration *decl)
+ const struct tgsi_full_declaration *decl,
+ LLVMValueRef out[4])
{
struct lp_build_context *base = &radeon_bld->soa.bld_base.base;
struct gallivm_state *gallivm = base->gallivm;
@@ -498,11 +499,8 @@ static void declare_input_vs(
/* Break up the vec4 into individual components */
for (chan = 0; chan < 4; chan++) {
LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
- /* XXX: Use a helper function for this. There is one in
- * tgsi_llvm.c. */
- ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, chan)] =
- LLVMBuildExtractElement(gallivm->builder,
- input, llvm_chan, "");
+ out[chan] = LLVMBuildExtractElement(gallivm->builder,
+ input, llvm_chan, "");
}
}
@@ -1463,7 +1461,8 @@ static LLVMValueRef get_interp_param(struct si_shader_context *ctx,
static void declare_input_fs(
struct radeon_llvm_context *radeon_bld,
unsigned input_index,
- const struct tgsi_full_declaration *decl)
+ const struct tgsi_full_declaration *decl,
+ LLVMValueRef out[4])
{
struct lp_build_context *base = &radeon_bld->soa.bld_base.base;
struct si_shader_context *ctx =
@@ -1482,14 +1481,10 @@ static void declare_input_fs(
unsigned offset = SI_PARAM_POS_FIXED_PT + 1 +
(i ? util_bitcount(colors_read & 0xf) : 0);
- radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 0)] =
- mask & 0x1 ? LLVMGetParam(main_fn, offset++) : base->undef;
- radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 1)] =
- mask & 0x2 ? LLVMGetParam(main_fn, offset++) : base->undef;
- radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 2)] =
- mask & 0x4 ? LLVMGetParam(main_fn, offset++) : base->undef;
- radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 3)] =
- mask & 0x8 ? LLVMGetParam(main_fn, offset++) : base->undef;
+ out[0] = mask & 0x1 ? LLVMGetParam(main_fn, offset++) : base->undef;
+ out[1] = mask & 0x2 ? LLVMGetParam(main_fn, offset++) : base->undef;
+ out[2] = mask & 0x4 ? LLVMGetParam(main_fn, offset++) : base->undef;
+ out[3] = mask & 0x8 ? LLVMGetParam(main_fn, offset++) : base->undef;
return;
}
@@ -1513,7 +1508,7 @@ static void declare_input_fs(
shader->selector->info.colors_read, interp_param,
LLVMGetParam(main_fn, SI_PARAM_PRIM_MASK),
LLVMGetParam(main_fn, SI_PARAM_FRONT_FACE),
- &radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 0)]);
+ &out[0]);
}
static LLVMValueRef get_sample_id(struct radeon_llvm_context *radeon_bld)