radeonsi: fix BFE/BFI lowering for GLSL semantics

Fixes spec/arb_gpu_shader5/execution/built-in-functions/*-bitfield{Extract,Insert} Cc: 13.0 <mesa-stable@lists.freedesktop.org> Reviewed-by: Marek Olšák <marek.olsak@amd.com> (cherry picked from commit 5aef14932ac047dc5f1af311a26b7f41b140d79f)
author: Nicolai Hähnle <nicolai.haehnle@amd.com> 2016-10-27 10:59:37 +0200
committer: Emil Velikov <emil.l.velikov@gmail.com> 2016-11-09 13:11:29 +0000
commit: ea07a57fc0092ae1111df49f76f48f61a27544b9 (patch)
tree: 086f9da00bcc65178d0c48db0f80c72eb58e953c
parent: 620ef8e7429e47d243db2dfb477567f14520bcb8 (diff)
download: external_mesa3d-ea07a57fc0092ae1111df49f76f48f61a27544b9.zip
external_mesa3d-ea07a57fc0092ae1111df49f76f48f61a27544b9.tar.gz
external_mesa3d-ea07a57fc0092ae1111df49f76f48f61a27544b9.tar.bz2
1 files changed, 34 insertions, 3 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
index 123ff5d..18e905b 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
@@ -459,6 +459,8 @@ static void emit_bfi(const struct lp_build_tgsi_action *action,
 	struct gallivm_state *gallivm = bld_base->base.gallivm;
 	LLVMBuilderRef builder = gallivm->builder;
 	LLVMValueRef bfi_args[3];
+	LLVMValueRef bfi_sm5;
+	LLVMValueRef cond;
 
 	// Calculate the bitmask: (((1 << src3) - 1) << src2
 	bfi_args[0] = LLVMBuildShl(builder,
@@ -478,11 +480,40 @@ static void emit_bfi(const struct lp_build_tgsi_action *action,
 	 *   (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
 	 * Use the right-hand side, which the LLVM backend can convert to V_BFI.
 	 */
-	emit_data->output[emit_data->chan] =
+	bfi_sm5 =
 		LLVMBuildXor(builder, bfi_args[2],
 			LLVMBuildAnd(builder, bfi_args[0],
 				LLVMBuildXor(builder, bfi_args[1], bfi_args[2],
 					     ""), ""), "");
+
+	/* Since shifts of >= 32 bits are undefined in LLVM IR, the backend
+	 * uses the convenient V_BFI lowering for the above, which follows SM5
+	 * and disagrees with GLSL semantics when bits (src3) is 32.
+	 */
+	cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[3],
+			     lp_build_const_int32(gallivm, 32), "");
+	emit_data->output[emit_data->chan] =
+		LLVMBuildSelect(builder, cond, emit_data->args[1], bfi_sm5, "");
+}
+
+static void emit_bfe(const struct lp_build_tgsi_action *action,
+		     struct lp_build_tgsi_context *bld_base,
+		     struct lp_build_emit_data *emit_data)
+{
+	struct gallivm_state *gallivm = bld_base->base.gallivm;
+	LLVMBuilderRef builder = gallivm->builder;
+	LLVMValueRef bfe_sm5;
+	LLVMValueRef cond;
+
+	bfe_sm5 = lp_build_intrinsic(builder, action->intr_name,
+				     emit_data->dst_type, emit_data->args,
+				     emit_data->arg_count, LLVMReadNoneAttribute);
+
+	/* Correct for GLSL semantics. */
+	cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[2],
+			     lp_build_const_int32(gallivm, 32), "");
+	emit_data->output[emit_data->chan] =
+		LLVMBuildSelect(builder, cond, emit_data->args[0], bfe_sm5, "");
 }
 
 /* this is ffs in C */
@@ -783,7 +814,7 @@ void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base)
 	bld_base->op_actions[TGSI_OPCODE_FSLT].emit = emit_fcmp;
 	bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp;
 	bld_base->op_actions[TGSI_OPCODE_IABS].emit = emit_iabs;
-	bld_base->op_actions[TGSI_OPCODE_IBFE].emit = build_tgsi_intrinsic_nomem;
+	bld_base->op_actions[TGSI_OPCODE_IBFE].emit = emit_bfe;
 	bld_base->op_actions[TGSI_OPCODE_IBFE].intr_name = "llvm.AMDGPU.bfe.i32";
 	bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv;
 	bld_base->op_actions[TGSI_OPCODE_IMAX].emit = emit_minmax_int;
@@ -835,7 +866,7 @@ void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base)
 	bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = build_tgsi_intrinsic_nomem;
 	bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.trunc.f32";
 	bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd;
-	bld_base->op_actions[TGSI_OPCODE_UBFE].emit = build_tgsi_intrinsic_nomem;
+	bld_base->op_actions[TGSI_OPCODE_UBFE].emit = emit_bfe;
 	bld_base->op_actions[TGSI_OPCODE_UBFE].intr_name = "llvm.AMDGPU.bfe.u32";
 	bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv;
 	bld_base->op_actions[TGSI_OPCODE_UMAX].emit = emit_minmax_int;
author	Nicolai Hähnle <nicolai.haehnle@amd.com>	2016-10-27 10:59:37 +0200
committer	Emil Velikov <emil.l.velikov@gmail.com>	2016-11-09 13:11:29 +0000
commit	ea07a57fc0092ae1111df49f76f48f61a27544b9 (patch)
tree	086f9da00bcc65178d0c48db0f80c72eb58e953c
parent	620ef8e7429e47d243db2dfb477567f14520bcb8 (diff)
download	external_mesa3d-ea07a57fc0092ae1111df49f76f48f61a27544b9.zip external_mesa3d-ea07a57fc0092ae1111df49f76f48f61a27544b9.tar.gz external_mesa3d-ea07a57fc0092ae1111df49f76f48f61a27544b9.tar.bz2