aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.cpp61
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.h4
-rw-r--r--lib/Target/R600/R600ISelLowering.cpp8
3 files changed, 72 insertions, 1 deletions
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 7ceab2d..78495ca 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -67,6 +67,13 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::STORE, MVT::f64, Promote);
AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64);
+ setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
+ setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
+ setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
+ // XXX: This can be change to Custom, once ExpandVectorStores can
+ // handle 64-bit stores.
+ setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
+
setOperationAction(ISD::LOAD, MVT::f32, Promote);
AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
@@ -187,6 +194,7 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::STORE: return LowerVectorStore(Op, DAG);
case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
}
return Op;
@@ -487,6 +495,59 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
return DAG.getMergeValues(Ops, 2, DL);
}
+SDValue AMDGPUTargetLowering::LowerVectorStore(const SDValue &Op,
+ SelectionDAG &DAG) const {
+ StoreSDNode *Store = dyn_cast<StoreSDNode>(Op);
+ EVT MemVT = Store->getMemoryVT();
+ unsigned MemBits = MemVT.getSizeInBits();
+
+ // Byte stores are really expensive, so if possible, try to pack
+ // 32-bit vector truncatating store into an i32 store.
+ // XXX: We could also handle optimize other vector bitwidths
+ if (!MemVT.isVector() || MemBits > 32) {
+ return SDValue();
+ }
+
+ SDLoc DL(Op);
+ const SDValue &Value = Store->getValue();
+ EVT VT = Value.getValueType();
+ const SDValue &Ptr = Store->getBasePtr();
+ EVT MemEltVT = MemVT.getVectorElementType();
+ unsigned MemEltBits = MemEltVT.getSizeInBits();
+ unsigned MemNumElements = MemVT.getVectorNumElements();
+ EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
+ SDValue Mask;
+ switch(MemEltBits) {
+ case 8:
+ Mask = DAG.getConstant(0xFF, PackedVT);
+ break;
+ case 16:
+ Mask = DAG.getConstant(0xFFFF, PackedVT);
+ break;
+ default:
+ llvm_unreachable("Cannot lower this vector store");
+ }
+ SDValue PackedValue;
+ for (unsigned i = 0; i < MemNumElements; ++i) {
+ EVT ElemVT = VT.getVectorElementType();
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value,
+ DAG.getConstant(i, MVT::i32));
+ Elt = DAG.getZExtOrTrunc(Elt, DL, PackedVT);
+ Elt = DAG.getNode(ISD::AND, DL, PackedVT, Elt, Mask);
+ SDValue Shift = DAG.getConstant(MemEltBits * i, PackedVT);
+ Elt = DAG.getNode(ISD::SHL, DL, PackedVT, Elt, Shift);
+ if (i == 0) {
+ PackedValue = Elt;
+ } else {
+ PackedValue = DAG.getNode(ISD::OR, DL, PackedVT, PackedValue, Elt);
+ }
+ }
+ return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr,
+ MachinePointerInfo(Store->getMemOperand()->getValue()),
+ Store->isVolatile(), Store->isNonTemporal(),
+ Store->getAlignment());
+}
+
//===----------------------------------------------------------------------===//
// Helper functions
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
index 8788c20..e3a0dcc 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -51,6 +51,10 @@ protected:
void AnalyzeFormalArguments(CCState &State,
const SmallVectorImpl<ISD::InputArg> &Ins) const;
+ /// \brief Lower vector stores by merging the vector elements into an integer
+ /// of the same bitwidth.
+ SDValue LowerVectorStore(const SDValue &Op, SelectionDAG &DAG) const;
+
public:
AMDGPUTargetLowering(TargetMachine &TM);
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index b6b6560..e10af2b 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -1011,10 +1011,15 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDValue Value = Op.getOperand(1);
SDValue Ptr = Op.getOperand(2);
+ SDValue Result = AMDGPUTargetLowering::LowerVectorStore(Op, DAG);
+ if (Result.getNode()) {
+ return Result;
+ }
+
if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
if (StoreNode->isTruncatingStore()) {
EVT VT = Value.getValueType();
- assert(VT == MVT::i32);
+ assert(VT.bitsLE(MVT::i32));
EVT MemVT = StoreNode->getMemoryVT();
SDValue MaskConstant;
if (MemVT == MVT::i8) {
@@ -1571,6 +1576,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
}
}
}
+
case AMDGPUISD::EXPORT: {
SDValue Arg = N->getOperand(1);
if (Arg.getOpcode() != ISD::BUILD_VECTOR)