diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/R600/AMDGPUISelLowering.cpp | 61 | ||||
-rw-r--r-- | lib/Target/R600/AMDGPUISelLowering.h | 4 | ||||
-rw-r--r-- | lib/Target/R600/R600ISelLowering.cpp | 8 |
3 files changed, 72 insertions, 1 deletions
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 7ceab2d..78495ca 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -67,6 +67,13 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::STORE, MVT::f64, Promote); AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64); + setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom); + setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom); + setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom); + // XXX: This can be change to Custom, once ExpandVectorStores can + // handle 64-bit stores. + setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand); + setOperationAction(ISD::LOAD, MVT::f32, Promote); AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32); @@ -187,6 +194,7 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::STORE: return LowerVectorStore(Op, DAG); case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); } return Op; @@ -487,6 +495,59 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, return DAG.getMergeValues(Ops, 2, DL); } +SDValue AMDGPUTargetLowering::LowerVectorStore(const SDValue &Op, + SelectionDAG &DAG) const { + StoreSDNode *Store = dyn_cast<StoreSDNode>(Op); + EVT MemVT = Store->getMemoryVT(); + unsigned MemBits = MemVT.getSizeInBits(); + + // Byte stores are really expensive, so if possible, try to pack + // 32-bit vector truncatating store into an i32 store. + // XXX: We could also handle optimize other vector bitwidths + if (!MemVT.isVector() || MemBits > 32) { + return SDValue(); + } + + SDLoc DL(Op); + const SDValue &Value = Store->getValue(); + EVT VT = Value.getValueType(); + const SDValue &Ptr = Store->getBasePtr(); + EVT MemEltVT = MemVT.getVectorElementType(); + unsigned MemEltBits = MemEltVT.getSizeInBits(); + unsigned MemNumElements = MemVT.getVectorNumElements(); + EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()); + SDValue Mask; + switch(MemEltBits) { + case 8: + Mask = DAG.getConstant(0xFF, PackedVT); + break; + case 16: + Mask = DAG.getConstant(0xFFFF, PackedVT); + break; + default: + llvm_unreachable("Cannot lower this vector store"); + } + SDValue PackedValue; + for (unsigned i = 0; i < MemNumElements; ++i) { + EVT ElemVT = VT.getVectorElementType(); + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value, + DAG.getConstant(i, MVT::i32)); + Elt = DAG.getZExtOrTrunc(Elt, DL, PackedVT); + Elt = DAG.getNode(ISD::AND, DL, PackedVT, Elt, Mask); + SDValue Shift = DAG.getConstant(MemEltBits * i, PackedVT); + Elt = DAG.getNode(ISD::SHL, DL, PackedVT, Elt, Shift); + if (i == 0) { + PackedValue = Elt; + } else { + PackedValue = DAG.getNode(ISD::OR, DL, PackedVT, PackedValue, Elt); + } + } + return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr, + MachinePointerInfo(Store->getMemOperand()->getValue()), + Store->isVolatile(), Store->isNonTemporal(), + Store->getAlignment()); +} + //===----------------------------------------------------------------------===// // Helper functions //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index 8788c20..e3a0dcc 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -51,6 +51,10 @@ protected: void AnalyzeFormalArguments(CCState &State, const SmallVectorImpl<ISD::InputArg> &Ins) const; + /// \brief Lower vector stores by merging the vector elements into an integer + /// of the same bitwidth. + SDValue LowerVectorStore(const SDValue &Op, SelectionDAG &DAG) const; + public: AMDGPUTargetLowering(TargetMachine &TM); diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index b6b6560..e10af2b 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -1011,10 +1011,15 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { SDValue Value = Op.getOperand(1); SDValue Ptr = Op.getOperand(2); + SDValue Result = AMDGPUTargetLowering::LowerVectorStore(Op, DAG); + if (Result.getNode()) { + return Result; + } + if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) { if (StoreNode->isTruncatingStore()) { EVT VT = Value.getValueType(); - assert(VT == MVT::i32); + assert(VT.bitsLE(MVT::i32)); EVT MemVT = StoreNode->getMemoryVT(); SDValue MaskConstant; if (MemVT == MVT::i8) { @@ -1571,6 +1576,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, } } } + case AMDGPUISD::EXPORT: { SDValue Arg = N->getOperand(1); if (Arg.getOpcode() != ISD::BUILD_VECTOR) |