diff options
author | Vincent Lejeune <vljn@ovi.com> | 2013-02-14 16:55:06 +0000 |
---|---|---|
committer | Vincent Lejeune <vljn@ovi.com> | 2013-02-14 16:55:06 +0000 |
commit | abfd5f6154b10cc5801bc9e1b8e8221df0113c68 (patch) | |
tree | c3371f25ffa5f7ba373dafdf93502dad986c7f5a /lib | |
parent | df65b0fb51d57a7e8dfcd19557b1d00c11c9fe2a (diff) | |
download | external_llvm-abfd5f6154b10cc5801bc9e1b8e8221df0113c68.zip external_llvm-abfd5f6154b10cc5801bc9e1b8e8221df0113c68.tar.gz external_llvm-abfd5f6154b10cc5801bc9e1b8e8221df0113c68.tar.bz2 |
R600: Fold zero/one in export instructions
Reviewed-by: Tom Stellard <thomas.stellard at amd.com>
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175181 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/R600/R600ISelLowering.cpp | 112 | ||||
-rw-r--r-- | lib/Target/R600/R600Instructions.td | 20 | ||||
-rw-r--r-- | lib/Target/R600/R600Intrinsics.td | 3 |
3 files changed, 55 insertions, 80 deletions
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 8f4ec94..a7796b6 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -303,57 +303,6 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( using namespace llvm::Intrinsic; using namespace llvm::AMDGPUIntrinsic; -static SDValue -InsertScalarToRegisterExport(SelectionDAG &DAG, DebugLoc DL, SDNode **ExportMap, - unsigned Slot, unsigned Channel, unsigned Inst, unsigned Type, - SDValue Scalar, SDValue Chain) { - if (!ExportMap[Slot]) { - SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, - DL, MVT::v4f32, - DAG.getUNDEF(MVT::v4f32), - Scalar, - DAG.getConstant(Channel, MVT::i32)); - - unsigned Mask = 1 << Channel; - - const SDValue Ops[] = {Chain, Vector, DAG.getConstant(Inst, MVT::i32), - DAG.getConstant(Type, MVT::i32), DAG.getConstant(Slot, MVT::i32), - DAG.getConstant(Mask, MVT::i32)}; - - SDValue Res = DAG.getNode( - AMDGPUISD::EXPORT, - DL, - MVT::Other, - Ops, 6); - ExportMap[Slot] = Res.getNode(); - return Res; - } - - SDNode *ExportInstruction = (SDNode *) ExportMap[Slot] ; - SDValue PreviousVector = ExportInstruction->getOperand(1); - SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, - DL, MVT::v4f32, - PreviousVector, - Scalar, - DAG.getConstant(Channel, MVT::i32)); - - unsigned Mask = dyn_cast<ConstantSDNode>(ExportInstruction->getOperand(5)) - ->getZExtValue(); - Mask |= (1 << Channel); - - const SDValue Ops[] = {ExportInstruction->getOperand(0), Vector, - DAG.getConstant(Inst, MVT::i32), - DAG.getConstant(Type, MVT::i32), - DAG.getConstant(Slot, MVT::i32), - DAG.getConstant(Mask, MVT::i32)}; - - DAG.UpdateNodeOperands(ExportInstruction, - Ops, 6); - - return Chain; - -} - SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); @@ -379,16 +328,19 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const MFI->LiveOuts.push_back(Reg); return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2)); } - case AMDGPUIntrinsic::R600_store_pixel_color: { - MachineFunction &MF = DAG.getMachineFunction(); - R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); - int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); - - SDNode **OutputsMap = MFI->Outputs; - return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap, - RegIndex / 4, RegIndex % 4, 0, 0, Op.getOperand(2), - Chain); - + case AMDGPUIntrinsic::R600_store_swizzle: { + const SDValue Args[8] = { + Chain, + Op.getOperand(2), // Export Value + Op.getOperand(3), // ArrayBase + Op.getOperand(4), // Type + DAG.getConstant(0, MVT::i32), // SWZ_X + DAG.getConstant(1, MVT::i32), // SWZ_Y + DAG.getConstant(2, MVT::i32), // SWZ_Z + DAG.getConstant(3, MVT::i32) // SWZ_W + }; + return DAG.getNode(AMDGPUISD::EXPORT, Op.getDebugLoc(), Op.getValueType(), + Args, 8); } // default for switch(IntrinsicID) @@ -1195,7 +1147,43 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, LHS.getOperand(2), LHS.getOperand(3), CCOpcode); - + } + case AMDGPUISD::EXPORT: { + SDValue Arg = N->getOperand(1); + if (Arg.getOpcode() != ISD::BUILD_VECTOR) + break; + SDValue NewBldVec[4] = { + DAG.getUNDEF(MVT::f32), + DAG.getUNDEF(MVT::f32), + DAG.getUNDEF(MVT::f32), + DAG.getUNDEF(MVT::f32) + }; + SDValue NewArgs[8] = { + N->getOperand(0), // Chain + SDValue(), + N->getOperand(2), // ArrayBase + N->getOperand(3), // Type + N->getOperand(4), // SWZ_X + N->getOperand(5), // SWZ_Y + N->getOperand(6), // SWZ_Z + N->getOperand(7) // SWZ_W + }; + for (unsigned i = 0; i < Arg.getNumOperands(); i++) { + if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Arg.getOperand(i))) { + if (C->isZero()) { + NewArgs[4 + i] = DAG.getConstant(4, MVT::i32); // SEL_0 + } else if (C->isExactlyValue(1.0)) { + NewArgs[4 + i] = DAG.getConstant(5, MVT::i32); // SEL_0 + } else { + NewBldVec[i] = Arg.getOperand(i); + } + } else { + NewBldVec[i] = Arg.getOperand(i); + } + } + DebugLoc DL = N->getDebugLoc(); + NewArgs[1] = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4f32, NewBldVec, 4); + return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8); } } return SDValue(); diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index c9885a3..286ec9b 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -540,7 +540,7 @@ def INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", []>; // Export Instructions //===----------------------------------------------------------------------===// -def ExportType : SDTypeProfile<0, 5, [SDTCisFP<0>, SDTCisInt<1>]>; +def ExportType : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>; def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType, [SDNPHasChain, SDNPSideEffect]>; @@ -612,22 +612,12 @@ multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> { (v4f32 (IMPLICIT_DEF)), 1, 60, 7, 7, 7, 7, cf_inst, 0) >; - def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 0), - (i32 imm:$type), (i32 imm:$arraybase), (i32 imm)), - (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, - 0, 1, 2, 3, cf_inst, 0) - >; - def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 1), - (i32 imm:$type), (i32 imm:$arraybase), (i32 imm)), - (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, - 0, 1, 2, 3, cf_inst, 0) + def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type), + (i32 imm:$swz_x), (i32 imm:$swz_y), (i32 imm:$swz_z), (i32 imm:$swz_w)), + (ExportInst R600_Reg128:$src, imm:$type, imm:$base, + imm:$swz_x, imm:$swz_y, imm:$swz_z, imm:$swz_w, cf_inst, 0) >; - def : Pat<(int_R600_store_swizzle (v4f32 R600_Reg128:$src), imm:$arraybase, - imm:$type), - (ExportInst R600_Reg128:$src, imm:$type, imm:$arraybase, - 0, 1, 2, 3, cf_inst, 0) - >; } multiclass SteamOutputExportPattern<Instruction ExportInst, diff --git a/lib/Target/R600/R600Intrinsics.td b/lib/Target/R600/R600Intrinsics.td index 284d4d8..b5e4f1e 100644 --- a/lib/Target/R600/R600Intrinsics.td +++ b/lib/Target/R600/R600Intrinsics.td @@ -18,11 +18,8 @@ let TargetPrefix = "R600", isTarget = 1 in { Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_R600_store_swizzle : Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>; - def int_R600_store_stream_output : Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>; - def int_R600_store_pixel_color : - Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>; def int_R600_store_pixel_depth : Intrinsic<[], [llvm_float_ty], []>; def int_R600_store_pixel_stencil : |