aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2013-08-26 15:05:44 +0000
committerTom Stellard <thomas.stellard@amd.com>2013-08-26 15:05:44 +0000
commit7a0282daeb214f14d75249cc2d90302c44586c4e (patch)
tree31054dc82b33f196fd1399d99d84f5a512b91acb /lib
parentda25cd3e6de8f21005590c2de49868f883cf2410 (diff)
downloadexternal_llvm-7a0282daeb214f14d75249cc2d90302c44586c4e.zip
external_llvm-7a0282daeb214f14d75249cc2d90302c44586c4e.tar.gz
external_llvm-7a0282daeb214f14d75249cc2d90302c44586c4e.tar.bz2
R600: Add support for v4i32 and v2i32 local stores
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189222 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.cpp152
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.h12
-rw-r--r--lib/Target/R600/R600ISelLowering.cpp2
3 files changed, 107 insertions, 59 deletions
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 56a9a2b..9df835f 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -67,6 +67,13 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::STORE, MVT::f64, Promote);
AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64);
+ // Custom lowering of vector stores is required for local address space
+ // stores.
+ setOperationAction(ISD::STORE, MVT::v4i32, Custom);
+ // XXX: Native v2i32 local address space stores are possible, but not
+ // currently implemented.
+ setOperationAction(ISD::STORE, MVT::v2i32, Custom);
+
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
@@ -221,7 +228,7 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
- case ISD::STORE: return LowerVectorStore(Op, DAG);
+ case ISD::STORE: return LowerSTORE(Op, DAG);
case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
}
return Op;
@@ -417,7 +424,98 @@ SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
return Op;
}
+SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
+ SelectionDAG &DAG) const {
+ StoreSDNode *Store = dyn_cast<StoreSDNode>(Op);
+ EVT MemVT = Store->getMemoryVT();
+ unsigned MemBits = MemVT.getSizeInBits();
+
+ // Byte stores are really expensive, so if possible, try to pack
+ // 32-bit vector truncatating store into an i32 store.
+ // XXX: We could also handle optimize other vector bitwidths
+ if (!MemVT.isVector() || MemBits > 32) {
+ return SDValue();
+ }
+
+ SDLoc DL(Op);
+ const SDValue &Value = Store->getValue();
+ EVT VT = Value.getValueType();
+ const SDValue &Ptr = Store->getBasePtr();
+ EVT MemEltVT = MemVT.getVectorElementType();
+ unsigned MemEltBits = MemEltVT.getSizeInBits();
+ unsigned MemNumElements = MemVT.getVectorNumElements();
+ EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
+ SDValue Mask;
+ switch(MemEltBits) {
+ case 8:
+ Mask = DAG.getConstant(0xFF, PackedVT);
+ break;
+ case 16:
+ Mask = DAG.getConstant(0xFFFF, PackedVT);
+ break;
+ default:
+ llvm_unreachable("Cannot lower this vector store");
+ }
+ SDValue PackedValue;
+ for (unsigned i = 0; i < MemNumElements; ++i) {
+ EVT ElemVT = VT.getVectorElementType();
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value,
+ DAG.getConstant(i, MVT::i32));
+ Elt = DAG.getZExtOrTrunc(Elt, DL, PackedVT);
+ Elt = DAG.getNode(ISD::AND, DL, PackedVT, Elt, Mask);
+ SDValue Shift = DAG.getConstant(MemEltBits * i, PackedVT);
+ Elt = DAG.getNode(ISD::SHL, DL, PackedVT, Elt, Shift);
+ if (i == 0) {
+ PackedValue = Elt;
+ } else {
+ PackedValue = DAG.getNode(ISD::OR, DL, PackedVT, PackedValue, Elt);
+ }
+ }
+ return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr,
+ MachinePointerInfo(Store->getMemOperand()->getValue()),
+ Store->isVolatile(), Store->isNonTemporal(),
+ Store->getAlignment());
+}
+
+SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
+ SelectionDAG &DAG) const {
+ StoreSDNode *Store = cast<StoreSDNode>(Op);
+ EVT MemEltVT = Store->getMemoryVT().getVectorElementType();
+ EVT EltVT = Store->getValue().getValueType().getVectorElementType();
+ EVT PtrVT = Store->getBasePtr().getValueType();
+ unsigned NumElts = Store->getMemoryVT().getVectorNumElements();
+ SDLoc SL(Op);
+
+ SmallVector<SDValue, 8> Chains;
+
+ for (unsigned i = 0, e = NumElts; i != e; ++i) {
+ SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
+ Store->getValue(), DAG.getConstant(i, MVT::i32));
+ SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT,
+ Store->getBasePtr(),
+ DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8),
+ PtrVT));
+ Chains.push_back(DAG.getStore(Store->getChain(), SL, Val, Ptr,
+ MachinePointerInfo(Store->getMemOperand()->getValue()),
+ Store->isVolatile(), Store->isNonTemporal(),
+ Store->getAlignment()));
+ }
+ return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, &Chains[0], NumElts);
+}
+
+SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG);
+ if (Result.getNode()) {
+ return Result;
+ }
+ StoreSDNode *Store = cast<StoreSDNode>(Op);
+ if (Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
+ Store->getValue().getValueType().isVector()) {
+ return SplitVectorStore(Op, DAG);
+ }
+ return SDValue();
+}
SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
SelectionDAG &DAG) const {
@@ -524,58 +622,6 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
return DAG.getMergeValues(Ops, 2, DL);
}
-SDValue AMDGPUTargetLowering::LowerVectorStore(const SDValue &Op,
- SelectionDAG &DAG) const {
- StoreSDNode *Store = dyn_cast<StoreSDNode>(Op);
- EVT MemVT = Store->getMemoryVT();
- unsigned MemBits = MemVT.getSizeInBits();
-
- // Byte stores are really expensive, so if possible, try to pack
- // 32-bit vector truncatating store into an i32 store.
- // XXX: We could also handle optimize other vector bitwidths
- if (!MemVT.isVector() || MemBits > 32) {
- return SDValue();
- }
-
- SDLoc DL(Op);
- const SDValue &Value = Store->getValue();
- EVT VT = Value.getValueType();
- const SDValue &Ptr = Store->getBasePtr();
- EVT MemEltVT = MemVT.getVectorElementType();
- unsigned MemEltBits = MemEltVT.getSizeInBits();
- unsigned MemNumElements = MemVT.getVectorNumElements();
- EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
- SDValue Mask;
- switch(MemEltBits) {
- case 8:
- Mask = DAG.getConstant(0xFF, PackedVT);
- break;
- case 16:
- Mask = DAG.getConstant(0xFFFF, PackedVT);
- break;
- default:
- llvm_unreachable("Cannot lower this vector store");
- }
- SDValue PackedValue;
- for (unsigned i = 0; i < MemNumElements; ++i) {
- EVT ElemVT = VT.getVectorElementType();
- SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value,
- DAG.getConstant(i, MVT::i32));
- Elt = DAG.getZExtOrTrunc(Elt, DL, PackedVT);
- Elt = DAG.getNode(ISD::AND, DL, PackedVT, Elt, Mask);
- SDValue Shift = DAG.getConstant(MemEltBits * i, PackedVT);
- Elt = DAG.getNode(ISD::SHL, DL, PackedVT, Elt, Shift);
- if (i == 0) {
- PackedValue = Elt;
- } else {
- PackedValue = DAG.getNode(ISD::OR, DL, PackedVT, PackedValue, Elt);
- }
- }
- return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr,
- MachinePointerInfo(Store->getMemOperand()->getValue()),
- Store->isVolatile(), Store->isNonTemporal(),
- Store->getAlignment());
-}
//===----------------------------------------------------------------------===//
// Helper functions
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
index e3a0dcc..f739aed 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -31,6 +31,12 @@ private:
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ /// \brief Lower vector stores by merging the vector elements into an integer
+ /// of the same bitwidth.
+ SDValue MergeVectorStore(const SDValue &Op, SelectionDAG &DAG) const;
+ /// \brief Split a vector store into multiple scalar stores.
+ /// \returns The resulting chain.
+ SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
protected:
@@ -44,17 +50,13 @@ protected:
unsigned Reg, EVT VT) const;
SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
SelectionDAG &DAG) const;
-
+ SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
bool isHWTrueValue(SDValue Op) const;
bool isHWFalseValue(SDValue Op) const;
void AnalyzeFormalArguments(CCState &State,
const SmallVectorImpl<ISD::InputArg> &Ins) const;
- /// \brief Lower vector stores by merging the vector elements into an integer
- /// of the same bitwidth.
- SDValue LowerVectorStore(const SDValue &Op, SelectionDAG &DAG) const;
-
public:
AMDGPUTargetLowering(TargetMachine &TM);
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index b822431..e846ff4 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -1002,7 +1002,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDValue Value = Op.getOperand(1);
SDValue Ptr = Op.getOperand(2);
- SDValue Result = AMDGPUTargetLowering::LowerVectorStore(Op, DAG);
+ SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
if (Result.getNode()) {
return Result;
}