diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2013-08-14 23:25:00 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2013-08-14 23:25:00 +0000 |
commit | a41520cf9b9cefed2091a0624a34c5f7fdb42a68 (patch) | |
tree | 7e1d76112142ccb5f800e86a444e7e2b0ecc9304 /lib/Target | |
parent | e8e33f448e8830590c498ac5101ef8b27446ca3b (diff) | |
download | external_llvm-a41520cf9b9cefed2091a0624a34c5f7fdb42a68.zip external_llvm-a41520cf9b9cefed2091a0624a34c5f7fdb42a68.tar.gz external_llvm-a41520cf9b9cefed2091a0624a34c5f7fdb42a68.tar.bz2 |
R600/SI: Improve legalization of vector operations
This should fix hangs in the OpenCL piglit tests.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188431 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r-- | lib/Target/R600/AMDGPUISelLowering.cpp | 49 | ||||
-rw-r--r-- | lib/Target/R600/AMDGPUISelLowering.h | 5 | ||||
-rw-r--r-- | lib/Target/R600/SIISelLowering.cpp | 6 | ||||
-rw-r--r-- | lib/Target/R600/SIISelLowering.h | 1 |
4 files changed, 56 insertions, 5 deletions
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 9bb487e..1e79998 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -79,8 +79,10 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::LOAD, MVT::f64, Promote); AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Expand); - setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Expand); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom); setOperationAction(ISD::FNEG, MVT::v2f32, Expand); setOperationAction(ISD::FNEG, MVT::v4f32, Expand); @@ -182,6 +184,8 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); // AMDGPU DAG lowering + case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); + case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); } @@ -208,6 +212,47 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, return DAG.getConstant(Offset, TD->getPointerSize() == 8 ? MVT::i64 : MVT::i32); } +void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &Args, + unsigned Start, + unsigned Count) const { + EVT VT = Op.getValueType(); + for (unsigned i = Start, e = Start + Count; i != e; ++i) { + Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), + VT.getVectorElementType(), + Op, DAG.getConstant(i, MVT::i32))); + } +} + +SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op, + SelectionDAG &DAG) const { + SmallVector<SDValue, 8> Args; + SDValue A = Op.getOperand(0); + SDValue B = Op.getOperand(1); + + ExtractVectorElements(A, DAG, Args, 0, + A.getValueType().getVectorNumElements()); + ExtractVectorElements(B, DAG, Args, 0, + B.getValueType().getVectorNumElements()); + + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), + &Args[0], Args.size()); +} + +SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, + SelectionDAG &DAG) const { + + SmallVector<SDValue, 8> Args; + EVT VT = Op.getValueType(); + unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + ExtractVectorElements(Op.getOperand(0), DAG, Args, Start, + VT.getVectorNumElements()); + + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), + &Args[0], Args.size()); +} + + SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index 5419e71..9adbb54 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -25,6 +25,11 @@ class MachineRegisterInfo; class AMDGPUTargetLowering : public TargetLowering { private: + void ExtractVectorElements(SDValue Op, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &Args, + unsigned Start, unsigned Count) const; + SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 30a510d..0bd8bce 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -34,9 +34,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass); addRegisterClass(MVT::i64, &AMDGPU::VSrc_64RegClass); - addRegisterClass(MVT::v2i1, &AMDGPU::VReg_64RegClass); - addRegisterClass(MVT::v4i1, &AMDGPU::VReg_128RegClass); - addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass); addRegisterClass(MVT::v64i8, &AMDGPU::SReg_512RegClass); @@ -110,6 +107,9 @@ bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT VT, return VT.bitsGT(MVT::i32); } +bool SITargetLowering::shouldSplitVectorElementType(EVT VT) const { + return VT.bitsLE(MVT::i8); +} SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, SDLoc DL, SDValue Chain, diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index 321e58c..9c54a6f 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -47,6 +47,7 @@ class SITargetLowering : public AMDGPUTargetLowering { public: SITargetLowering(TargetMachine &tm); bool allowsUnalignedMemoryAccesses(EVT VT, bool *IsFast) const; + virtual bool shouldSplitVectorElementType(EVT VT) const; SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, |