summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/radeonsi/si_shader.c
diff options
context:
space:
mode:
authorMarek Olšák <marek.olsak@amd.com>2016-07-16 13:41:54 +0200
committerMarek Olšák <marek.olsak@amd.com>2016-07-19 23:45:06 +0200
commit2596ae2b6eb11bd70f147390126258e76adb51d2 (patch)
tree40b71f341f3f7d1a06d05176b0db8f2adce06919 /src/gallium/drivers/radeonsi/si_shader.c
parentb2b45cecef33b0212037e53bf229e14293d0f4a0 (diff)
downloadexternal_mesa3d-2596ae2b6eb11bd70f147390126258e76adb51d2.zip
external_mesa3d-2596ae2b6eb11bd70f147390126258e76adb51d2.tar.gz
external_mesa3d-2596ae2b6eb11bd70f147390126258e76adb51d2.tar.bz2
radeonsi: emit PS exports last
This effectively removes s_waitcnt instructions after FP16 exports. Before: v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 exp 15, 0, 1, 0, 0, v0, v1, v0, v0 ; F800040F 00000100 s_waitcnt expcnt(0) ; BF8C0F0F v_cvt_pkrtz_f16_f32_e32 v0, v4, v5 ; 5E000B04 v_cvt_pkrtz_f16_f32_e32 v1, v6, v7 ; 5E020F06 exp 15, 1, 1, 0, 0, v0, v1, v0, v0 ; F800041F 00000100 s_waitcnt expcnt(0) ; BF8C0F0F v_cvt_pkrtz_f16_f32_e32 v0, v8, v9 ; 5E001308 v_cvt_pkrtz_f16_f32_e32 v1, v10, v11 ; 5E02170A exp 15, 2, 1, 0, 0, v0, v1, v0, v0 ; F800042F 00000100 s_waitcnt expcnt(0) ; BF8C0F0F v_cvt_pkrtz_f16_f32_e32 v0, v12, v13 ; 5E001B0C v_cvt_pkrtz_f16_f32_e32 v1, v14, v15 ; 5E021F0E exp 15, 3, 1, 1, 1, v0, v1, v0, v0 ; F8001C3F 00000100 s_endpgm ; BF810000 After: v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702 v_cvt_pkrtz_f16_f32_e32 v2, v4, v5 ; 5E040B04 v_cvt_pkrtz_f16_f32_e32 v3, v6, v7 ; 5E060F06 exp 15, 0, 1, 0, 0, v0, v1, v0, v0 ; F800040F 00000100 v_cvt_pkrtz_f16_f32_e32 v4, v8, v9 ; 5E081308 v_cvt_pkrtz_f16_f32_e32 v5, v10, v11 ; 5E0A170A exp 15, 1, 1, 0, 0, v2, v3, v0, v0 ; F800041F 00000302 v_cvt_pkrtz_f16_f32_e32 v6, v12, v13 ; 5E0C1B0C v_cvt_pkrtz_f16_f32_e32 v7, v14, v15 ; 5E0E1F0E exp 15, 2, 1, 0, 0, v4, v5, v0, v0 ; F800042F 00000504 exp 15, 3, 1, 1, 1, v6, v7, v0, v0 ; F8001C3F 00000706 s_endpgm ; BF810000 Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Diffstat (limited to 'src/gallium/drivers/radeonsi/si_shader.c')
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c44
1 files changed, 31 insertions, 13 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 41bcbd4..adf706c 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2917,9 +2917,14 @@ static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context *bld_base)
FREE(outputs);
}
+struct si_ps_exports {
+ unsigned num;
+ LLVMValueRef args[10][9];
+};
+
static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base,
- LLVMValueRef depth, LLVMValueRef stencil,
- LLVMValueRef samplemask)
+ LLVMValueRef depth, LLVMValueRef stencil,
+ LLVMValueRef samplemask, struct si_ps_exports *exp)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
struct lp_build_context *base = &bld_base->base;
@@ -2965,14 +2970,13 @@ static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base,
/* Specify which components to enable */
args[0] = lp_build_const_int32(base->gallivm, mask);
- lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
- ctx->voidt, args, 9, 0);
+ memcpy(exp->args[exp->num++], args, sizeof(args));
}
static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
LLVMValueRef *color, unsigned index,
unsigned samplemask_param,
- bool is_last)
+ bool is_last, struct si_ps_exports *exp)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
struct lp_build_context *base = &bld_base->base;
@@ -3018,8 +3022,7 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
} else if (args[c][0] == bld_base->uint_bld.zero)
continue; /* unnecessary NULL export */
- lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
- ctx->voidt, args[c], 9, 0);
+ memcpy(exp->args[exp->num++], args[c], sizeof(args[c]));
}
} else {
LLVMValueRef args[9];
@@ -3033,11 +3036,19 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
} else if (args[0] == bld_base->uint_bld.zero)
return; /* unnecessary NULL export */
- lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
- ctx->voidt, args, 9, 0);
+ memcpy(exp->args[exp->num++], args, sizeof(args));
}
}
+static void si_emit_ps_exports(struct si_shader_context *ctx,
+ struct si_ps_exports *exp)
+{
+ for (unsigned i = 0; i < exp->num; i++)
+ lp_build_intrinsic(ctx->radeon_bld.gallivm.builder,
+ "llvm.SI.export", ctx->voidt,
+ exp->args[i], 9, 0);
+}
+
static void si_export_null(struct lp_build_tgsi_context *bld_base)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
@@ -3069,6 +3080,7 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context *bld_base)
LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
int last_color_export = -1;
int i;
+ struct si_ps_exports exp = {};
/* Determine the last export. If MRTZ is present, it's always last.
* Otherwise, find the last color export.
@@ -3135,7 +3147,7 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context *bld_base)
si_export_mrt_color(bld_base, color, semantic_index,
SI_PARAM_SAMPLE_COVERAGE,
- last_color_export == i);
+ last_color_export == i, &exp);
break;
default:
fprintf(stderr,
@@ -3145,7 +3157,9 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context *bld_base)
}
if (depth || stencil || samplemask)
- si_export_mrt_z(bld_base, depth, stencil, samplemask);
+ si_export_mrt_z(bld_base, depth, stencil, samplemask, &exp);
+
+ si_emit_ps_exports(ctx, &exp);
}
/**
@@ -7495,6 +7509,7 @@ static bool si_compile_ps_epilog(struct si_screen *sscreen,
LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
int last_sgpr, num_params, i;
bool status = true;
+ struct si_ps_exports exp = {};
si_init_shader_ctx(&ctx, sscreen, &shader, tm);
ctx.type = PIPE_SHADER_FRAGMENT;
@@ -7564,7 +7579,7 @@ static bool si_compile_ps_epilog(struct si_screen *sscreen,
si_export_mrt_color(bld_base, color, mrt,
num_params - 1,
- mrt == last_color_export);
+ mrt == last_color_export, &exp);
}
/* Process depth, stencil, samplemask. */
@@ -7576,10 +7591,13 @@ static bool si_compile_ps_epilog(struct si_screen *sscreen,
samplemask = LLVMGetParam(ctx.radeon_bld.main_fn, vgpr++);
if (depth || stencil || samplemask)
- si_export_mrt_z(bld_base, depth, stencil, samplemask);
+ si_export_mrt_z(bld_base, depth, stencil, samplemask, &exp);
else if (last_color_export == -1)
si_export_null(bld_base);
+ if (exp.num)
+ si_emit_ps_exports(&ctx, &exp);
+
/* Compile. */
LLVMBuildRetVoid(gallivm->builder);
radeon_llvm_finalize_module(&ctx.radeon_bld);