diff options
Diffstat (limited to 'lib/Target/R600/SIISelLowering.cpp')
| -rw-r--r-- | lib/Target/R600/SIISelLowering.cpp | 233 |
1 files changed, 172 insertions, 61 deletions
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index d5d2b68..0b55411 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -14,6 +14,7 @@ #include "SIISelLowering.h" #include "AMDGPU.h" +#include "AMDGPUSubtarget.h" #include "AMDILIntrinsicInfo.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" @@ -24,13 +25,10 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/Function.h" -const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL; - using namespace llvm; SITargetLowering::SITargetLowering(TargetMachine &TM) : AMDGPUTargetLowering(TM) { - addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass); addRegisterClass(MVT::i64, &AMDGPU::VSrc_64RegClass); @@ -76,7 +74,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand); - setOperationAction(ISD::ADD, MVT::i64, Legal); setOperationAction(ISD::ADD, MVT::i32, Legal); setOperationAction(ISD::ADDC, MVT::i32, Legal); setOperationAction(ISD::ADDE, MVT::i32, Legal); @@ -97,13 +94,18 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::LOAD, MVT::i64, Custom); setOperationAction(ISD::LOAD, MVT::v2i32, Custom); setOperationAction(ISD::LOAD, MVT::v4i32, Custom); + setOperationAction(ISD::LOAD, MVT::v8i32, Custom); + setOperationAction(ISD::STORE, MVT::i1, Custom); setOperationAction(ISD::STORE, MVT::i32, Custom); setOperationAction(ISD::STORE, MVT::i64, Custom); setOperationAction(ISD::STORE, MVT::i128, Custom); setOperationAction(ISD::STORE, MVT::v2i32, Custom); setOperationAction(ISD::STORE, MVT::v4i32, Custom); + setOperationAction(ISD::SELECT, MVT::i64, Custom); + setOperationAction(ISD::SELECT, MVT::f64, Promote); + AddPromotedToType(ISD::SELECT, MVT::f64, MVT::i64); setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); @@ -125,11 +127,20 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::i32, Expand); + setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom); + setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Expand); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom); setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, Expand); setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::i32, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + setTruncStoreAction(MVT::i32, MVT::i8, Custom); + setTruncStoreAction(MVT::i32, MVT::i16, Custom); setTruncStoreAction(MVT::f64, MVT::f32, Expand); setTruncStoreAction(MVT::i64, MVT::i32, Expand); setTruncStoreAction(MVT::i128, MVT::i64, Expand); @@ -137,10 +148,50 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand); setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); - setOperationAction(ISD::FrameIndex, MVT::i64, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); + setOperationAction(ISD::FrameIndex, MVT::i32, Custom); - setTargetDAGCombine(ISD::SELECT_CC); + // We only support LOAD/STORE and vector manipulation ops for vectors + // with > 4 elements. + MVT VecTypes[] = { + MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32 + }; + + const size_t NumVecTypes = array_lengthof(VecTypes); + for (unsigned Type = 0; Type < NumVecTypes; ++Type) { + for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) { + switch(Op) { + case ISD::LOAD: + case ISD::STORE: + case ISD::BUILD_VECTOR: + case ISD::BITCAST: + case ISD::EXTRACT_VECTOR_ELT: + case ISD::INSERT_VECTOR_ELT: + case ISD::CONCAT_VECTORS: + case ISD::INSERT_SUBVECTOR: + case ISD::EXTRACT_SUBVECTOR: + break; + default: + setOperationAction(Op, VecTypes[Type], Expand); + break; + } + } + } + + for (int I = MVT::v1f64; I <= MVT::v8f64; ++I) { + MVT::SimpleValueType VT = static_cast<MVT::SimpleValueType>(I); + setOperationAction(ISD::FTRUNC, VT, Expand); + setOperationAction(ISD::FCEIL, VT, Expand); + setOperationAction(ISD::FFLOOR, VT, Expand); + } + if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) { + setOperationAction(ISD::FTRUNC, MVT::f64, Legal); + setOperationAction(ISD::FCEIL, MVT::f64, Legal); + setOperationAction(ISD::FFLOOR, MVT::f64, Legal); + } + + setTargetDAGCombine(ISD::SELECT_CC); setTargetDAGCombine(ISD::SETCC); setSchedulingPreference(Sched::RegPressure); @@ -151,6 +202,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : //===----------------------------------------------------------------------===// bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT VT, + unsigned AddrSpace, bool *IsFast) const { // XXX: This depends on the address space and also we may want to revist // the alignment values we specify in the DataLayout. @@ -159,8 +211,15 @@ bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT VT, return VT.bitsGT(MVT::i32); } -bool SITargetLowering::shouldSplitVectorElementType(EVT VT) const { - return VT.bitsLE(MVT::i16); +bool SITargetLowering::shouldSplitVectorType(EVT VT) const { + return VT.getScalarType().bitsLE(MVT::i16); +} + +bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, + Type *Ty) const { + const SIInstrInfo *TII = + static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo()); + return TII->isInlineConstant(Imm); } SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, @@ -346,16 +405,16 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo()); MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); unsigned SuperReg = MI->getOperand(0).getReg(); - unsigned SubRegLo = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); - unsigned SubRegHi = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); - unsigned SubRegHiHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); - unsigned SubRegHiLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + unsigned SubRegLo = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass); + unsigned SubRegHi = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass); + unsigned SubRegHiHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + unsigned SubRegHiLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B64), SubRegLo) .addOperand(MI->getOperand(1)); BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiLo) .addImm(0); BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiHi) - .addImm(RSRC_DATA_FORMAT >> 32); + .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32); BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::REG_SEQUENCE), SubRegHi) .addReg(SubRegHiLo) .addImm(AMDGPU::sub0) @@ -439,13 +498,14 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); switch (Op.getOpcode()) { default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); - case ISD::ADD: return LowerADD(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::LOAD: { LoadSDNode *Load = dyn_cast<LoadSDNode>(Op); - if ((Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || - Load->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) && - Op.getValueType().isVector()) { + if (Op.getValueType().isVector() && + (Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || + Load->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS || + (Load->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && + Op.getValueType().getVectorNumElements() > 4))) { SDValue MergedValues[2] = { SplitVectorLoad(Op, DAG), Load->getChain() @@ -456,6 +516,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { } } + case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG); case ISD::STORE: return LowerSTORE(Op, DAG); @@ -576,33 +637,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } -SDValue SITargetLowering::LowerADD(SDValue Op, - SelectionDAG &DAG) const { - if (Op.getValueType() != MVT::i64) - return SDValue(); - - SDLoc DL(Op); - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - - SDValue Zero = DAG.getConstant(0, MVT::i32); - SDValue One = DAG.getConstant(1, MVT::i32); - - SDValue Lo0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS, Zero); - SDValue Hi0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS, One); - - SDValue Lo1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS, Zero); - SDValue Hi1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS, One); - - SDVTList VTList = DAG.getVTList(MVT::i32, MVT::Glue); - - SDValue AddLo = DAG.getNode(ISD::ADDC, DL, VTList, Lo0, Lo1); - SDValue Carry = AddLo.getValue(1); - SDValue AddHi = DAG.getNode(ISD::ADDE, DL, VTList, Hi0, Hi1, Carry); - - return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, AddLo, AddHi.getValue(0)); -} - /// \brief Helper function for LowerBRCOND static SDNode *findUser(SDValue Value, unsigned Opcode) { @@ -700,23 +734,42 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND, SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); LoadSDNode *Load = cast<LoadSDNode>(Op); + SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG); + SDValue MergedValues[2]; + MergedValues[1] = Load->getChain(); + if (Ret.getNode()) { + MergedValues[0] = Ret; + return DAG.getMergeValues(MergedValues, 2, DL); + } - if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) + if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { return SDValue(); + } + + EVT MemVT = Load->getMemoryVT(); + + assert(!MemVT.isVector() && "Private loads should be scalarized"); + assert(!MemVT.isFloatingPoint() && "FP loads should be promoted to int"); - SDValue TruncPtr = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, - Load->getBasePtr(), DAG.getConstant(0, MVT::i32)); - SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, TruncPtr, + SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(), DAG.getConstant(2, MVT::i32)); + Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32, + Load->getChain(), Ptr, + DAG.getTargetConstant(0, MVT::i32), + Op.getOperand(2)); + if (MemVT.getSizeInBits() == 64) { + SDValue IncPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr, + DAG.getConstant(1, MVT::i32)); + + SDValue LoadUpper = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32, + Load->getChain(), IncPtr, + DAG.getTargetConstant(0, MVT::i32), + Op.getOperand(2)); + + Ret = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ret, LoadUpper); + } - SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(), - Load->getChain(), Ptr, - DAG.getTargetConstant(0, MVT::i32), - Op.getOperand(2)); - SDValue MergedValues[2] = { - Ret, - Load->getChain() - }; + MergedValues[0] = Ret; return DAG.getMergeValues(MergedValues, 2, DL); } @@ -744,6 +797,33 @@ SDValue SITargetLowering::LowerSampleIntrinsic(unsigned Opcode, Op.getOperand(4)); } +SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { + if (Op.getValueType() != MVT::i64) + return SDValue(); + + SDLoc DL(Op); + SDValue Cond = Op.getOperand(0); + + SDValue Zero = DAG.getConstant(0, MVT::i32); + SDValue One = DAG.getConstant(1, MVT::i32); + + SDValue LHS = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Op.getOperand(1)); + SDValue RHS = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Op.getOperand(2)); + + SDValue Lo0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, LHS, Zero); + SDValue Lo1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, RHS, Zero); + + SDValue Lo = DAG.getSelect(DL, MVT::i32, Cond, Lo0, Lo1); + + SDValue Hi0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, LHS, One); + SDValue Hi1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, RHS, One); + + SDValue Hi = DAG.getSelect(DL, MVT::i32, Cond, Hi0, Hi1); + + SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2i32, Lo, Hi); + return DAG.getNode(ISD::BITCAST, DL, MVT::i64, Res); +} + SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); @@ -790,16 +870,47 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { if (VT.isVector() && VT.getVectorNumElements() >= 8) return SplitVectorStore(Op, DAG); + if (VT == MVT::i1) + return DAG.getTruncStore(Store->getChain(), DL, + DAG.getSExtOrTrunc(Store->getValue(), DL, MVT::i32), + Store->getBasePtr(), MVT::i1, Store->getMemOperand()); + if (Store->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) return SDValue(); - SDValue TruncPtr = DAG.getZExtOrTrunc(Store->getBasePtr(), DL, MVT::i32); - SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, TruncPtr, + SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Store->getBasePtr(), DAG.getConstant(2, MVT::i32)); SDValue Chain = Store->getChain(); SmallVector<SDValue, 8> Values; - if (VT == MVT::i64) { + if (Store->isTruncatingStore()) { + unsigned Mask = 0; + if (Store->getMemoryVT() == MVT::i8) { + Mask = 0xff; + } else if (Store->getMemoryVT() == MVT::i16) { + Mask = 0xffff; + } + SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32, + Chain, Store->getBasePtr(), + DAG.getConstant(0, MVT::i32)); + SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, Store->getBasePtr(), + DAG.getConstant(0x3, MVT::i32)); + SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx, + DAG.getConstant(3, MVT::i32)); + SDValue MaskedValue = DAG.getNode(ISD::AND, DL, MVT::i32, Store->getValue(), + DAG.getConstant(Mask, MVT::i32)); + SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32, + MaskedValue, ShiftAmt); + SDValue RotrAmt = DAG.getNode(ISD::SUB, DL, MVT::i32, + DAG.getConstant(32, MVT::i32), ShiftAmt); + SDValue DstMask = DAG.getNode(ISD::ROTR, DL, MVT::i32, + DAG.getConstant(Mask, MVT::i32), + RotrAmt); + Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask); + Dst = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue); + + Values.push_back(Dst); + } else if (VT == MVT::i64) { for (unsigned i = 0; i < 2; ++i) { Values.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Store->getValue(), DAG.getConstant(i, MVT::i32))); @@ -1047,7 +1158,7 @@ void SITargetLowering::ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand, else return; - // Nothing todo if they fit naturaly + // Nothing to do if they fit naturally if (fitsRegClass(DAG, Operand, RegClass)) return; |
