diff options
Diffstat (limited to 'lib/Target/R600/AMDGPUISelLowering.cpp')
-rw-r--r-- | lib/Target/R600/AMDGPUISelLowering.cpp | 744 |
1 files changed, 566 insertions, 178 deletions
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 183725c..6c443ea 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -28,8 +28,50 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/DiagnosticPrinter.h" using namespace llvm; + +namespace { + +/// Diagnostic information for unimplemented or unsupported feature reporting. +class DiagnosticInfoUnsupported : public DiagnosticInfo { +private: + const Twine &Description; + const Function &Fn; + + static int KindID; + + static int getKindID() { + if (KindID == 0) + KindID = llvm::getNextAvailablePluginDiagnosticKind(); + return KindID; + } + +public: + DiagnosticInfoUnsupported(const Function &Fn, const Twine &Desc, + DiagnosticSeverity Severity = DS_Error) + : DiagnosticInfo(getKindID(), Severity), + Description(Desc), + Fn(Fn) { } + + const Function &getFunction() const { return Fn; } + const Twine &getDescription() const { return Description; } + + void print(DiagnosticPrinter &DP) const override { + DP << "unsupported " << getDescription() << " in " << Fn.getName(); + } + + static bool classof(const DiagnosticInfo *DI) { + return DI->getKind() == getKindID(); + } +}; + +int DiagnosticInfoUnsupported::KindID = 0; +} + + static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { @@ -88,6 +130,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::STORE, MVT::f64, Promote); AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64); + setOperationAction(ISD::STORE, MVT::v2f64, Promote); + AddPromotedToType(ISD::STORE, MVT::v2f64, MVT::v2i64); + // Custom lowering of vector stores is required for local address space // stores. setOperationAction(ISD::STORE, MVT::v4i32, Custom); @@ -103,6 +148,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : // handle 64-bit stores. setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand); + setTruncStoreAction(MVT::i64, MVT::i16, Expand); + setTruncStoreAction(MVT::i64, MVT::i8, Expand); setTruncStoreAction(MVT::i64, MVT::i1, Expand); setTruncStoreAction(MVT::v2i64, MVT::v2i1, Expand); setTruncStoreAction(MVT::v4i64, MVT::v4i1, Expand); @@ -126,6 +173,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::LOAD, MVT::f64, Promote); AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64); + setOperationAction(ISD::LOAD, MVT::v2f64, Promote); + AddPromotedToType(ISD::LOAD, MVT::v2f64, MVT::v2i64); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i32, Custom); @@ -152,15 +202,19 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::BR_CC, MVT::i1, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); + setOperationAction(ISD::FNEG, MVT::v2f32, Expand); setOperationAction(ISD::FNEG, MVT::v4f32, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::MUL, MVT::i64, Expand); + setOperationAction(ISD::SUB, MVT::i64, Expand); setOperationAction(ISD::UDIV, MVT::i32, Expand); setOperationAction(ISD::UDIVREM, MVT::i32, Custom); + setOperationAction(ISD::UDIVREM, MVT::i64, Custom); setOperationAction(ISD::UREM, MVT::i32, Expand); setOperationAction(ISD::VSELECT, MVT::v2f32, Expand); setOperationAction(ISD::VSELECT, MVT::v4f32, Expand); @@ -168,10 +222,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : static const MVT::SimpleValueType IntTypes[] = { MVT::v2i32, MVT::v4i32 }; - const size_t NumIntTypes = array_lengthof(IntTypes); - for (unsigned int x = 0; x < NumIntTypes; ++x) { - MVT::SimpleValueType VT = IntTypes[x]; + for (MVT VT : IntTypes) { //Expand the following operations for the current type by default setOperationAction(ISD::ADD, VT, Expand); setOperationAction(ISD::AND, VT, Expand); @@ -195,12 +247,11 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : static const MVT::SimpleValueType FloatTypes[] = { MVT::v2f32, MVT::v4f32 }; - const size_t NumFloatTypes = array_lengthof(FloatTypes); - for (unsigned int x = 0; x < NumFloatTypes; ++x) { - MVT::SimpleValueType VT = FloatTypes[x]; + for (MVT VT : FloatTypes) { setOperationAction(ISD::FABS, VT, Expand); setOperationAction(ISD::FADD, VT, Expand); + setOperationAction(ISD::FCOS, VT, Expand); setOperationAction(ISD::FDIV, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); setOperationAction(ISD::FFLOOR, VT, Expand); @@ -208,25 +259,13 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::FMUL, VT, Expand); setOperationAction(ISD::FRINT, VT, Expand); setOperationAction(ISD::FSQRT, VT, Expand); + setOperationAction(ISD::FSIN, VT, Expand); setOperationAction(ISD::FSUB, VT, Expand); setOperationAction(ISD::SELECT, VT, Expand); } - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Custom); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Custom); - - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Custom); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Custom); - - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Custom); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Custom); - - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Custom); - - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Custom); + setTargetDAGCombine(ISD::MUL); + setTargetDAGCombine(ISD::SELECT_CC); } //===----------------------------------------------------------------------===// @@ -325,6 +364,25 @@ SDValue AMDGPUTargetLowering::LowerReturn( // Target specific lowering //===---------------------------------------------------------------------===// +SDValue AMDGPUTargetLowering::LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const { + SDValue Callee = CLI.Callee; + SelectionDAG &DAG = CLI.DAG; + + const Function &Fn = *DAG.getMachineFunction().getFunction(); + + StringRef FuncName("<unknown>"); + + if (const ExternalSymbolSDNode *G = dyn_cast<ExternalSymbolSDNode>(Callee)) + FuncName = G->getSymbol(); + else if (const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) + FuncName = G->getGlobal()->getName(); + + DiagnosticInfoUnsupported NoCalls(Fn, "call to function " + FuncName); + DAG.getContext()->diagnose(NoCalls); + return SDValue(); +} + SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { @@ -361,12 +419,111 @@ void AMDGPUTargetLowering::ReplaceNodeResults(SDNode *N, // ReplaceNodeResults to sext_in_reg to an illegal type, so we'll just do // nothing here and let the illegal result integer be handled normally. return; + case ISD::UDIV: { + SDValue Op = SDValue(N, 0); + SDLoc DL(Op); + EVT VT = Op.getValueType(); + SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT), + N->getOperand(0), N->getOperand(1)); + Results.push_back(UDIVREM); + break; + } + case ISD::UREM: { + SDValue Op = SDValue(N, 0); + SDLoc DL(Op); + EVT VT = Op.getValueType(); + SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT), + N->getOperand(0), N->getOperand(1)); + Results.push_back(UDIVREM.getValue(1)); + break; + } + case ISD::UDIVREM: { + SDValue Op = SDValue(N, 0); + SDLoc DL(Op); + EVT VT = Op.getValueType(); + EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext()); + + SDValue one = DAG.getConstant(1, HalfVT); + SDValue zero = DAG.getConstant(0, HalfVT); + + //HiLo split + SDValue LHS = N->getOperand(0); + SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, zero); + SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, one); + + SDValue RHS = N->getOperand(1); + SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero); + SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one); + + // Get Speculative values + SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo); + SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo); + + SDValue REM_Hi = zero; + SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, zero, REM_Part, LHS_Hi, ISD::SETEQ); + + SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, zero, DIV_Part, zero, ISD::SETEQ); + SDValue DIV_Lo = zero; + + const unsigned halfBitWidth = HalfVT.getSizeInBits(); + + for (unsigned i = 0; i < halfBitWidth; ++i) { + SDValue POS = DAG.getConstant(halfBitWidth - i - 1, HalfVT); + // Get Value of high bit + SDValue HBit; + if (halfBitWidth == 32 && Subtarget->hasBFE()) { + HBit = DAG.getNode(AMDGPUISD::BFE_U32, DL, HalfVT, LHS_Lo, POS, one); + } else { + HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS); + HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one); + } + + SDValue Carry = DAG.getNode(ISD::SRL, DL, HalfVT, REM_Lo, + DAG.getConstant(halfBitWidth - 1, HalfVT)); + REM_Hi = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Hi, one); + REM_Hi = DAG.getNode(ISD::OR, DL, HalfVT, REM_Hi, Carry); + + REM_Lo = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Lo, one); + REM_Lo = DAG.getNode(ISD::OR, DL, HalfVT, REM_Lo, HBit); + + + SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi); + + SDValue BIT = DAG.getConstant(1 << (halfBitWidth - i - 1), HalfVT); + SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, zero, ISD::SETGE); + + DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT); + + // Update REM + + SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS); + + REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETGE); + REM_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, zero); + REM_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, one); + } + SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi); + SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, DIV_Lo, DIV_Hi); + Results.push_back(DIV); + Results.push_back(REM); + break; + } default: return; } } +// FIXME: This implements accesses to initialized globals in the constant +// address space by copying them to private and accessing that. It does not +// properly handle illegal types or vectors. The private vector loads are not +// scalarized, and the illegal scalars hit an assertion. This technique will not +// work well with large initializers, and this should eventually be +// removed. Initialized globals should be placed into a data section that the +// runtime will load into a buffer before the kernel is executed. Uses of the +// global need to be replaced with a pointer loaded from an implicit kernel +// argument into this buffer holding the copy of the data, which will remove the +// need for any of this. SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init, const GlobalValue *GV, const SDValue &InitPtr, @@ -380,29 +537,60 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init, return DAG.getStore(Chain, DL, DAG.getConstant(*CI, VT), InitPtr, MachinePointerInfo(UndefValue::get(PtrTy)), false, false, TD->getPrefTypeAlignment(CI->getType())); - } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Init)) { + } + + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Init)) { EVT VT = EVT::getEVT(CFP->getType()); PointerType *PtrTy = PointerType::get(CFP->getType(), 0); return DAG.getStore(Chain, DL, DAG.getConstantFP(*CFP, VT), InitPtr, MachinePointerInfo(UndefValue::get(PtrTy)), false, false, TD->getPrefTypeAlignment(CFP->getType())); - } else if (Init->getType()->isAggregateType()) { + } + + Type *InitTy = Init->getType(); + if (StructType *ST = dyn_cast<StructType>(InitTy)) { + const StructLayout *SL = TD->getStructLayout(ST); + EVT PtrVT = InitPtr.getValueType(); - unsigned NumElements = Init->getType()->getArrayNumElements(); + SmallVector<SDValue, 8> Chains; + + for (unsigned I = 0, N = ST->getNumElements(); I != N; ++I) { + SDValue Offset = DAG.getConstant(SL->getElementOffset(I), PtrVT); + SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, InitPtr, Offset); + + Constant *Elt = Init->getAggregateElement(I); + Chains.push_back(LowerConstantInitializer(Elt, GV, Ptr, Chain, DAG)); + } + + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); + } + + if (SequentialType *SeqTy = dyn_cast<SequentialType>(InitTy)) { + EVT PtrVT = InitPtr.getValueType(); + + unsigned NumElements; + if (ArrayType *AT = dyn_cast<ArrayType>(SeqTy)) + NumElements = AT->getNumElements(); + else if (VectorType *VT = dyn_cast<VectorType>(SeqTy)) + NumElements = VT->getNumElements(); + else + llvm_unreachable("Unexpected type"); + + unsigned EltSize = TD->getTypeAllocSize(SeqTy->getElementType()); SmallVector<SDValue, 8> Chains; for (unsigned i = 0; i < NumElements; ++i) { - SDValue Offset = DAG.getConstant(i * TD->getTypeAllocSize( - Init->getType()->getArrayElementType()), PtrVT); + SDValue Offset = DAG.getConstant(i * EltSize, PtrVT); SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, InitPtr, Offset); - Chains.push_back(LowerConstantInitializer(Init->getAggregateElement(i), - GV, Ptr, Chain, DAG)); + + Constant *Elt = Init->getAggregateElement(i); + Chains.push_back(LowerConstantInitializer(Elt, GV, Ptr, Chain, DAG)); } - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Chains[0], - Chains.size()); - } else { - Init->dump(); - llvm_unreachable("Unhandled constant initializer"); + + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); } + + Init->dump(); + llvm_unreachable("Unhandled constant initializer"); } SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, @@ -440,7 +628,7 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, unsigned Size = TD->getTypeAllocSize(EltType); unsigned Alignment = TD->getPrefTypeAlignment(EltType); - const GlobalVariable *Var = dyn_cast<GlobalVariable>(GV); + const GlobalVariable *Var = cast<GlobalVariable>(GV); const Constant *Init = Var->getInitializer(); int FI = FrameInfo->CreateStackObject(Size, Alignment, false); SDValue InitPtr = DAG.getFrameIndex(FI, @@ -461,7 +649,7 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, for (unsigned i = 1; i < (*I)->getNumOperands(); ++i) { Ops.push_back((*I)->getOperand(i)); } - DAG.UpdateNodeOperands(*I, &Ops[0], Ops.size()); + DAG.UpdateNodeOperands(*I, Ops); } return DAG.getZExtOrTrunc(InitPtr, SDLoc(Op), getPointerTy(AMDGPUAS::CONSTANT_ADDRESS)); @@ -469,44 +657,28 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, } } -void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &Args, - unsigned Start, - unsigned Count) const { - EVT VT = Op.getValueType(); - for (unsigned i = Start, e = Start + Count; i != e; ++i) { - Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), - VT.getVectorElementType(), - Op, DAG.getConstant(i, MVT::i32))); - } -} - SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { SmallVector<SDValue, 8> Args; SDValue A = Op.getOperand(0); SDValue B = Op.getOperand(1); - ExtractVectorElements(A, DAG, Args, 0, - A.getValueType().getVectorNumElements()); - ExtractVectorElements(B, DAG, Args, 0, - B.getValueType().getVectorNumElements()); + DAG.ExtractVectorElements(A, Args); + DAG.ExtractVectorElements(B, Args); - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), - &Args[0], Args.size()); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), Args); } SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const { SmallVector<SDValue, 8> Args; - EVT VT = Op.getValueType(); unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); - ExtractVectorElements(Op.getOperand(0), DAG, Args, Start, - VT.getVectorNumElements()); + EVT VT = Op.getValueType(); + DAG.ExtractVectorElements(Op.getOperand(0), Args, Start, + VT.getVectorNumElements()); - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), - &Args[0], Args.size()); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), Args); } SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op, @@ -560,6 +732,22 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1), Op.getOperand(2)); + case AMDGPUIntrinsic::AMDGPU_umul24: + return DAG.getNode(AMDGPUISD::MUL_U24, DL, VT, + Op.getOperand(1), Op.getOperand(2)); + + case AMDGPUIntrinsic::AMDGPU_imul24: + return DAG.getNode(AMDGPUISD::MUL_I24, DL, VT, + Op.getOperand(1), Op.getOperand(2)); + + case AMDGPUIntrinsic::AMDGPU_umad24: + return DAG.getNode(AMDGPUISD::MAD_U24, DL, VT, + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + + case AMDGPUIntrinsic::AMDGPU_imad24: + return DAG.getNode(AMDGPUISD::MAD_I24, DL, VT, + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + case AMDGPUIntrinsic::AMDGPU_bfe_i32: return DAG.getNode(AMDGPUISD::BFE_I32, DL, VT, Op.getOperand(1), @@ -590,8 +778,7 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, ///IABS(a) = SMAX(sub(0, a), a) SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op, - SelectionDAG &DAG) const { - + SelectionDAG &DAG) const { SDLoc DL(Op); EVT VT = Op.getValueType(); SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), @@ -603,7 +790,7 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op, /// Linear Interpolation /// LRP(a, b, c) = muladd(a, b, (1 - a) * c) SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op, - SelectionDAG &DAG) const { + SelectionDAG &DAG) const { SDLoc DL(Op); EVT VT = Op.getValueType(); SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT, @@ -617,16 +804,16 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op, } /// \brief Generate Min/Max node -SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op, - SelectionDAG &DAG) const { - SDLoc DL(Op); - EVT VT = Op.getValueType(); +SDValue AMDGPUTargetLowering::CombineMinMax(SDNode *N, + SelectionDAG &DAG) const { + SDLoc DL(N); + EVT VT = N->getValueType(0); - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - SDValue True = Op.getOperand(2); - SDValue False = Op.getOperand(3); - SDValue CC = Op.getOperand(4); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue True = N->getOperand(2); + SDValue False = N->getOperand(3); + SDValue CC = N->getOperand(4); if (VT != MVT::f32 || !((LHS == True && RHS == False) || (LHS == False && RHS == True))) { @@ -654,10 +841,8 @@ SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op, case ISD::SETOLT: case ISD::SETLE: case ISD::SETLT: { - if (LHS == True) - return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS); - else - return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS); + unsigned Opc = (LHS == True) ? AMDGPUISD::FMIN : AMDGPUISD::FMAX; + return DAG.getNode(Opc, DL, VT, LHS, RHS); } case ISD::SETGT: case ISD::SETGE: @@ -665,15 +850,13 @@ SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op, case ISD::SETOGE: case ISD::SETUGT: case ISD::SETOGT: { - if (LHS == True) - return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS); - else - return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS); + unsigned Opc = (LHS == True) ? AMDGPUISD::FMAX : AMDGPUISD::FMIN; + return DAG.getNode(Opc, DL, VT, LHS, RHS); } case ISD::SETCC_INVALID: llvm_unreachable("Invalid setcc condcode!"); } - return Op; + return SDValue(); } SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op, @@ -695,8 +878,7 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op, MemEltVT, Load->isVolatile(), Load->isNonTemporal(), Load->getAlignment())); } - return DAG.getNode(ISD::BUILD_VECTOR, SL, Op.getValueType(), - Loads.data(), Loads.size()); + return DAG.getNode(ISD::BUILD_VECTOR, SL, Op.getValueType(), Loads); } SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op, @@ -713,32 +895,46 @@ SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op, } SDLoc DL(Op); - const SDValue &Value = Store->getValue(); + SDValue Value = Store->getValue(); EVT VT = Value.getValueType(); - const SDValue &Ptr = Store->getBasePtr(); + EVT ElemVT = VT.getVectorElementType(); + SDValue Ptr = Store->getBasePtr(); EVT MemEltVT = MemVT.getVectorElementType(); unsigned MemEltBits = MemEltVT.getSizeInBits(); unsigned MemNumElements = MemVT.getVectorNumElements(); - EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()); - SDValue Mask = DAG.getConstant((1 << MemEltBits) - 1, PackedVT); + unsigned PackedSize = MemVT.getStoreSizeInBits(); + SDValue Mask = DAG.getConstant((1 << MemEltBits) - 1, MVT::i32); + + assert(Value.getValueType().getScalarSizeInBits() >= 32); SDValue PackedValue; for (unsigned i = 0; i < MemNumElements; ++i) { - EVT ElemVT = VT.getVectorElementType(); SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value, DAG.getConstant(i, MVT::i32)); - Elt = DAG.getZExtOrTrunc(Elt, DL, PackedVT); - Elt = DAG.getNode(ISD::AND, DL, PackedVT, Elt, Mask); - SDValue Shift = DAG.getConstant(MemEltBits * i, PackedVT); - Elt = DAG.getNode(ISD::SHL, DL, PackedVT, Elt, Shift); + Elt = DAG.getZExtOrTrunc(Elt, DL, MVT::i32); + Elt = DAG.getNode(ISD::AND, DL, MVT::i32, Elt, Mask); // getZeroExtendInReg + + SDValue Shift = DAG.getConstant(MemEltBits * i, MVT::i32); + Elt = DAG.getNode(ISD::SHL, DL, MVT::i32, Elt, Shift); + if (i == 0) { PackedValue = Elt; } else { - PackedValue = DAG.getNode(ISD::OR, DL, PackedVT, PackedValue, Elt); + PackedValue = DAG.getNode(ISD::OR, DL, MVT::i32, PackedValue, Elt); } } + + if (PackedSize < 32) { + EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), PackedSize); + return DAG.getTruncStore(Store->getChain(), DL, PackedValue, Ptr, + Store->getMemOperand()->getPointerInfo(), + PackedVT, + Store->isNonTemporal(), Store->isVolatile(), + Store->getAlignment()); + } + return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr, - MachinePointerInfo(Store->getMemOperand()->getValue()), + Store->getMemOperand()->getPointerInfo(), Store->isVolatile(), Store->isNonTemporal(), Store->getAlignment()); } @@ -766,7 +962,7 @@ SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op, MemEltVT, Store->isVolatile(), Store->isNonTemporal(), Store->getAlignment())); } - return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, &Chains[0], NumElts); + return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Chains); } SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { @@ -788,9 +984,24 @@ SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::getExtForLoadExtType(ExtType), DL, VT, ExtLoad32); } + if (ExtType == ISD::NON_EXTLOAD && VT.getSizeInBits() < 32) { + assert(VT == MVT::i1 && "Only i1 non-extloads expected"); + // FIXME: Copied from PPC + // First, load into 32 bits, then truncate to 1 bit. + + SDValue Chain = Load->getChain(); + SDValue BasePtr = Load->getBasePtr(); + MachineMemOperand *MMO = Load->getMemOperand(); + + SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain, + BasePtr, MVT::i8, MMO); + return DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD); + } + // Lower loads constant address space global variable loads if (Load->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS && - isa<GlobalVariable>(GetUnderlyingObject(Load->getPointerInfo().V))) { + isa<GlobalVariable>( + GetUnderlyingObject(Load->getMemOperand()->getValue()))) { SDValue Ptr = DAG.getZExtOrTrunc(Load->getBasePtr(), DL, getPointerTy(AMDGPUAS::PRIVATE_ADDRESS)); @@ -887,15 +1098,13 @@ SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { } SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, - SelectionDAG &DAG) const { + SelectionDAG &DAG) const { SDLoc DL(Op); EVT VT = Op.getValueType(); SDValue Num = Op.getOperand(0); SDValue Den = Op.getOperand(1); - SmallVector<SDValue, 8> Results; - // RCP = URECIP(Den) = 2^32 / Den + e // e is rounding error. SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den); @@ -985,10 +1194,11 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem) Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), Remainder_A_Den, Rem, ISD::SETEQ); - SDValue Ops[2]; - Ops[0] = Div; - Ops[1] = Rem; - return DAG.getMergeValues(Ops, 2, DL); + SDValue Ops[2] = { + Div, + Rem + }; + return DAG.getMergeValues(Ops, DL); } SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op, @@ -1029,81 +1239,197 @@ SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, MVT VT = Op.getSimpleValueType(); MVT ScalarVT = VT.getScalarType(); - unsigned SrcBits = ExtraVT.getScalarType().getSizeInBits(); - unsigned DestBits = ScalarVT.getSizeInBits(); - unsigned BitsDiff = DestBits - SrcBits; - - if (!Subtarget->hasBFE()) - return ExpandSIGN_EXTEND_INREG(Op, BitsDiff, DAG); + if (!VT.isVector()) + return SDValue(); SDValue Src = Op.getOperand(0); - if (VT.isVector()) { - SDLoc DL(Op); - // Need to scalarize this, and revisit each of the scalars later. - // TODO: Don't scalarize on Evergreen? - unsigned NElts = VT.getVectorNumElements(); - SmallVector<SDValue, 8> Args; - ExtractVectorElements(Src, DAG, Args, 0, NElts); + SDLoc DL(Op); - SDValue VTOp = DAG.getValueType(ExtraVT.getScalarType()); - for (unsigned I = 0; I < NElts; ++I) - Args[I] = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ScalarVT, Args[I], VTOp); + // TODO: Don't scalarize on Evergreen? + unsigned NElts = VT.getVectorNumElements(); + SmallVector<SDValue, 8> Args; + DAG.ExtractVectorElements(Src, Args, 0, NElts); - return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Args.data(), Args.size()); - } + SDValue VTOp = DAG.getValueType(ExtraVT.getScalarType()); + for (unsigned I = 0; I < NElts; ++I) + Args[I] = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ScalarVT, Args[I], VTOp); - if (SrcBits == 32) { - SDLoc DL(Op); + return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Args); +} - // If the source is 32-bits, this is really half of a 2-register pair, and - // we need to discard the unused half of the pair. - SDValue TruncSrc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Src); - return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, TruncSrc); - } +//===----------------------------------------------------------------------===// +// Custom DAG optimizations +//===----------------------------------------------------------------------===// - unsigned NElts = VT.isVector() ? VT.getVectorNumElements() : 1; +static bool isU24(SDValue Op, SelectionDAG &DAG) { + APInt KnownZero, KnownOne; + EVT VT = Op.getValueType(); + DAG.computeKnownBits(Op, KnownZero, KnownOne); - // TODO: Match 64-bit BFE. SI has a 64-bit BFE, but it's scalar only so it - // might not be worth the effort, and will need to expand to shifts when - // fixing SGPR copies. - if (SrcBits < 32 && DestBits <= 32) { - SDLoc DL(Op); - MVT ExtVT = (NElts == 1) ? MVT::i32 : MVT::getVectorVT(MVT::i32, NElts); - - if (DestBits != 32) - Src = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Src); - - // FIXME: This should use TargetConstant, but that hits assertions for - // Evergreen. - SDValue Ext = DAG.getNode(AMDGPUISD::BFE_I32, DL, ExtVT, - Op.getOperand(0), // Operand - DAG.getConstant(0, ExtVT), // Offset - DAG.getConstant(SrcBits, ExtVT)); // Width - - // Truncate to the original type if necessary. - if (ScalarVT == MVT::i32) - return Ext; - return DAG.getNode(ISD::TRUNCATE, DL, VT, Ext); - } + return (VT.getSizeInBits() - KnownZero.countLeadingOnes()) <= 24; +} - // For small types, extend to 32-bits first. - if (SrcBits < 32) { - SDLoc DL(Op); - MVT ExtVT = (NElts == 1) ? MVT::i32 : MVT::getVectorVT(MVT::i32, NElts); +static bool isI24(SDValue Op, SelectionDAG &DAG) { + EVT VT = Op.getValueType(); - SDValue TruncSrc = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, Src); - SDValue Ext32 = DAG.getNode(AMDGPUISD::BFE_I32, - DL, - ExtVT, - TruncSrc, // Operand - DAG.getConstant(0, ExtVT), // Offset - DAG.getConstant(SrcBits, ExtVT)); // Width + // In order for this to be a signed 24-bit value, bit 23, must + // be a sign bit. + return VT.getSizeInBits() >= 24 && // Types less than 24-bit should be treated + // as unsigned 24-bit values. + (VT.getSizeInBits() - DAG.ComputeNumSignBits(Op)) < 24; +} + +static void simplifyI24(SDValue Op, TargetLowering::DAGCombinerInfo &DCI) { + + SelectionDAG &DAG = DCI.DAG; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = Op.getValueType(); + + APInt Demanded = APInt::getLowBitsSet(VT.getSizeInBits(), 24); + APInt KnownZero, KnownOne; + TargetLowering::TargetLoweringOpt TLO(DAG, true, true); + if (TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) + DCI.CommitTargetLoweringOpt(TLO); +} - return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Ext32); +template <typename IntTy> +static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0, + uint32_t Offset, uint32_t Width) { + if (Width + Offset < 32) { + IntTy Result = (Src0 << (32 - Offset - Width)) >> (32 - Width); + return DAG.getConstant(Result, MVT::i32); } - // For everything else, use the standard bitshift expansion. - return ExpandSIGN_EXTEND_INREG(Op, BitsDiff, DAG); + return DAG.getConstant(Src0 >> Offset, MVT::i32); +} + +SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + SDLoc DL(N); + + switch(N->getOpcode()) { + default: break; + case ISD::MUL: { + EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue Mul; + + // FIXME: Add support for 24-bit multiply with 64-bit output on SI. + if (VT.isVector() || VT.getSizeInBits() > 32) + break; + + if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) { + N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32); + N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32); + Mul = DAG.getNode(AMDGPUISD::MUL_U24, DL, MVT::i32, N0, N1); + } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) { + N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32); + N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32); + Mul = DAG.getNode(AMDGPUISD::MUL_I24, DL, MVT::i32, N0, N1); + } else { + break; + } + + // We need to use sext even for MUL_U24, because MUL_U24 is used + // for signed multiply of 8 and 16-bit types. + SDValue Reg = DAG.getSExtOrTrunc(Mul, DL, VT); + + return Reg; + } + case AMDGPUISD::MUL_I24: + case AMDGPUISD::MUL_U24: { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + simplifyI24(N0, DCI); + simplifyI24(N1, DCI); + return SDValue(); + } + case ISD::SELECT_CC: { + return CombineMinMax(N, DAG); + } + case AMDGPUISD::BFE_I32: + case AMDGPUISD::BFE_U32: { + assert(!N->getValueType(0).isVector() && + "Vector handling of BFE not implemented"); + ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); + if (!Width) + break; + + uint32_t WidthVal = Width->getZExtValue() & 0x1f; + if (WidthVal == 0) + return DAG.getConstant(0, MVT::i32); + + ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (!Offset) + break; + + SDValue BitsFrom = N->getOperand(0); + uint32_t OffsetVal = Offset->getZExtValue() & 0x1f; + + bool Signed = N->getOpcode() == AMDGPUISD::BFE_I32; + + if (OffsetVal == 0) { + // This is already sign / zero extended, so try to fold away extra BFEs. + unsigned SignBits = Signed ? (32 - WidthVal + 1) : (32 - WidthVal); + + unsigned OpSignBits = DAG.ComputeNumSignBits(BitsFrom); + if (OpSignBits >= SignBits) + return BitsFrom; + + EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), WidthVal); + if (Signed) { + // This is a sign_extend_inreg. Replace it to take advantage of existing + // DAG Combines. If not eliminated, we will match back to BFE during + // selection. + + // TODO: The sext_inreg of extended types ends, although we can could + // handle them in a single BFE. + return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, BitsFrom, + DAG.getValueType(SmallVT)); + } + + return DAG.getZeroExtendInReg(BitsFrom, DL, SmallVT); + } + + if (ConstantSDNode *Val = dyn_cast<ConstantSDNode>(N->getOperand(0))) { + if (Signed) { + return constantFoldBFE<int32_t>(DAG, + Val->getSExtValue(), + OffsetVal, + WidthVal); + } + + return constantFoldBFE<uint32_t>(DAG, + Val->getZExtValue(), + OffsetVal, + WidthVal); + } + + APInt Demanded = APInt::getBitsSet(32, + OffsetVal, + OffsetVal + WidthVal); + + if ((OffsetVal + WidthVal) >= 32) { + SDValue ShiftVal = DAG.getConstant(OffsetVal, MVT::i32); + return DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, MVT::i32, + BitsFrom, ShiftVal); + } + + APInt KnownZero, KnownOne; + TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), + !DCI.isBeforeLegalizeOps()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLO.ShrinkDemandedConstant(BitsFrom, Demanded) || + TLI.SimplifyDemandedBits(BitsFrom, Demanded, KnownZero, KnownOne, TLO)) { + DCI.CommitTargetLoweringOpt(TLO); + } + + break; + } + } + return SDValue(); } //===----------------------------------------------------------------------===// @@ -1181,7 +1507,7 @@ SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG, const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { - default: return 0; + default: return nullptr; // AMDIL DAG nodes NODE_NAME_CASE(CALL); NODE_NAME_CASE(UMUL); @@ -1202,6 +1528,10 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(BFE_I32) NODE_NAME_CASE(BFI) NODE_NAME_CASE(BFM) + NODE_NAME_CASE(MUL_U24) + NODE_NAME_CASE(MUL_I24) + NODE_NAME_CASE(MAD_U24) + NODE_NAME_CASE(MAD_I24) NODE_NAME_CASE(URECIP) NODE_NAME_CASE(DOT4) NODE_NAME_CASE(EXPORT) @@ -1219,22 +1549,22 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { } } -static void computeMaskedBitsForMinMax(const SDValue Op0, - const SDValue Op1, - APInt &KnownZero, - APInt &KnownOne, - const SelectionDAG &DAG, - unsigned Depth) { +static void computeKnownBitsForMinMax(const SDValue Op0, + const SDValue Op1, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) { APInt Op0Zero, Op0One; APInt Op1Zero, Op1One; - DAG.ComputeMaskedBits(Op0, Op0Zero, Op0One, Depth); - DAG.ComputeMaskedBits(Op1, Op1Zero, Op1One, Depth); + DAG.computeKnownBits(Op0, Op0Zero, Op0One, Depth); + DAG.computeKnownBits(Op1, Op1Zero, Op1One, Depth); KnownZero = Op0Zero & Op1Zero; KnownOne = Op0One & Op1One; } -void AMDGPUTargetLowering::computeMaskedBitsForTargetNode( +void AMDGPUTargetLowering::computeKnownBitsForTargetNode( const SDValue Op, APInt &KnownZero, APInt &KnownOne, @@ -1242,8 +1572,14 @@ void AMDGPUTargetLowering::computeMaskedBitsForTargetNode( unsigned Depth) const { KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything. + + APInt KnownZero2; + APInt KnownOne2; unsigned Opc = Op.getOpcode(); + switch (Opc) { + default: + break; case ISD::INTRINSIC_WO_CHAIN: { // FIXME: The intrinsic should just use the node. switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) { @@ -1251,8 +1587,8 @@ void AMDGPUTargetLowering::computeMaskedBitsForTargetNode( case AMDGPUIntrinsic::AMDGPU_umax: case AMDGPUIntrinsic::AMDGPU_imin: case AMDGPUIntrinsic::AMDGPU_umin: - computeMaskedBitsForMinMax(Op.getOperand(1), Op.getOperand(2), - KnownZero, KnownOne, DAG, Depth); + computeKnownBitsForMinMax(Op.getOperand(1), Op.getOperand(2), + KnownZero, KnownOne, DAG, Depth); break; default: break; @@ -1264,10 +1600,62 @@ void AMDGPUTargetLowering::computeMaskedBitsForTargetNode( case AMDGPUISD::UMAX: case AMDGPUISD::SMIN: case AMDGPUISD::UMIN: - computeMaskedBitsForMinMax(Op.getOperand(0), Op.getOperand(1), - KnownZero, KnownOne, DAG, Depth); + computeKnownBitsForMinMax(Op.getOperand(0), Op.getOperand(1), + KnownZero, KnownOne, DAG, Depth); break; - default: + + case AMDGPUISD::BFE_I32: + case AMDGPUISD::BFE_U32: { + ConstantSDNode *CWidth = dyn_cast<ConstantSDNode>(Op.getOperand(2)); + if (!CWidth) + return; + + unsigned BitWidth = 32; + uint32_t Width = CWidth->getZExtValue() & 0x1f; + if (Width == 0) { + KnownZero = APInt::getAllOnesValue(BitWidth); + KnownOne = APInt::getNullValue(BitWidth); + return; + } + + // FIXME: This could do a lot more. If offset is 0, should be the same as + // sign_extend_inreg implementation, but that involves duplicating it. + if (Opc == AMDGPUISD::BFE_I32) + KnownOne = APInt::getHighBitsSet(BitWidth, BitWidth - Width); + else + KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - Width); + break; } + } +} + +unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode( + SDValue Op, + const SelectionDAG &DAG, + unsigned Depth) const { + switch (Op.getOpcode()) { + case AMDGPUISD::BFE_I32: { + ConstantSDNode *Width = dyn_cast<ConstantSDNode>(Op.getOperand(2)); + if (!Width) + return 1; + + unsigned SignBits = 32 - Width->getZExtValue() + 1; + ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + if (!Offset || !Offset->isNullValue()) + return SignBits; + + // TODO: Could probably figure something out with non-0 offsets. + unsigned Op0SignBits = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1); + return std::max(SignBits, Op0SignBits); + } + + case AMDGPUISD::BFE_U32: { + ConstantSDNode *Width = dyn_cast<ConstantSDNode>(Op.getOperand(2)); + return Width ? 32 - (Width->getZExtValue() & 0x1f) : 1; + } + + default: + return 1; + } } |