aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2013-08-14 23:25:00 +0000
committerTom Stellard <thomas.stellard@amd.com>2013-08-14 23:25:00 +0000
commita41520cf9b9cefed2091a0624a34c5f7fdb42a68 (patch)
tree7e1d76112142ccb5f800e86a444e7e2b0ecc9304 /lib/Target
parente8e33f448e8830590c498ac5101ef8b27446ca3b (diff)
downloadexternal_llvm-a41520cf9b9cefed2091a0624a34c5f7fdb42a68.zip
external_llvm-a41520cf9b9cefed2091a0624a34c5f7fdb42a68.tar.gz
external_llvm-a41520cf9b9cefed2091a0624a34c5f7fdb42a68.tar.bz2
R600/SI: Improve legalization of vector operations
This should fix hangs in the OpenCL piglit tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188431 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.cpp49
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.h5
-rw-r--r--lib/Target/R600/SIISelLowering.cpp6
-rw-r--r--lib/Target/R600/SIISelLowering.h1
4 files changed, 56 insertions, 5 deletions
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 9bb487e..1e79998 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -79,8 +79,10 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::LOAD, MVT::f64, Promote);
AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Expand);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Expand);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom);
setOperationAction(ISD::FNEG, MVT::v2f32, Expand);
setOperationAction(ISD::FNEG, MVT::v4f32, Expand);
@@ -182,6 +184,8 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
// AMDGPU DAG lowering
+ case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
+ case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
}
@@ -208,6 +212,47 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
return DAG.getConstant(Offset, TD->getPointerSize() == 8 ? MVT::i64 : MVT::i32);
}
+void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &Args,
+ unsigned Start,
+ unsigned Count) const {
+ EVT VT = Op.getValueType();
+ for (unsigned i = Start, e = Start + Count; i != e; ++i) {
+ Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op),
+ VT.getVectorElementType(),
+ Op, DAG.getConstant(i, MVT::i32)));
+ }
+}
+
+SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
+ SelectionDAG &DAG) const {
+ SmallVector<SDValue, 8> Args;
+ SDValue A = Op.getOperand(0);
+ SDValue B = Op.getOperand(1);
+
+ ExtractVectorElements(A, DAG, Args, 0,
+ A.getValueType().getVectorNumElements());
+ ExtractVectorElements(B, DAG, Args, 0,
+ B.getValueType().getVectorNumElements());
+
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(),
+ &Args[0], Args.size());
+}
+
+SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ SmallVector<SDValue, 8> Args;
+ EVT VT = Op.getValueType();
+ unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ ExtractVectorElements(Op.getOperand(0), DAG, Args, Start,
+ VT.getVectorNumElements());
+
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(),
+ &Args[0], Args.size());
+}
+
+
SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
index 5419e71..9adbb54 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -25,6 +25,11 @@ class MachineRegisterInfo;
class AMDGPUTargetLowering : public TargetLowering {
private:
+ void ExtractVectorElements(SDValue Op, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &Args,
+ unsigned Start, unsigned Count) const;
+ SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 30a510d..0bd8bce 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -34,9 +34,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass);
addRegisterClass(MVT::i64, &AMDGPU::VSrc_64RegClass);
- addRegisterClass(MVT::v2i1, &AMDGPU::VReg_64RegClass);
- addRegisterClass(MVT::v4i1, &AMDGPU::VReg_128RegClass);
-
addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass);
addRegisterClass(MVT::v64i8, &AMDGPU::SReg_512RegClass);
@@ -110,6 +107,9 @@ bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
return VT.bitsGT(MVT::i32);
}
+bool SITargetLowering::shouldSplitVectorElementType(EVT VT) const {
+ return VT.bitsLE(MVT::i8);
+}
SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT,
SDLoc DL, SDValue Chain,
diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
index 321e58c..9c54a6f 100644
--- a/lib/Target/R600/SIISelLowering.h
+++ b/lib/Target/R600/SIISelLowering.h
@@ -47,6 +47,7 @@ class SITargetLowering : public AMDGPUTargetLowering {
public:
SITargetLowering(TargetMachine &tm);
bool allowsUnalignedMemoryAccesses(EVT VT, bool *IsFast) const;
+ virtual bool shouldSplitVectorElementType(EVT VT) const;
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,