diff options
Diffstat (limited to 'lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 138 |
1 files changed, 107 insertions, 31 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 8b92e70..c32412a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -32,6 +32,7 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" @@ -2142,6 +2143,7 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), /*isVolatile*/false, /*AlwaysInline=*/true, + /*isTailCall*/false, MachinePointerInfo(), MachinePointerInfo()); } @@ -2277,6 +2279,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, const { MachineFunction &MF = DAG.getMachineFunction(); X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); + const TargetFrameLowering &TFI = *Subtarget->getFrameLowering(); const Function* Fn = MF.getFunction(); if (Fn->hasExternalLinkage() && @@ -2416,6 +2419,13 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, MFI->CreateFixedObject(1, StackSize, true)); } + MachineModuleInfo &MMI = MF.getMMI(); + const Function *WinEHParent = nullptr; + if (IsWin64 && MMI.hasWinEHFuncInfo(Fn)) + WinEHParent = MMI.getWinEHParent(Fn); + bool IsWinEHOutlined = WinEHParent && WinEHParent != Fn; + bool IsWinEHParent = WinEHParent && WinEHParent == Fn; + // Figure out if XMM registers are in use. assert(!(MF.getTarget().Options.UseSoftFloat && Fn->hasFnAttribute(Attribute::NoImplicitFloat)) && @@ -2452,7 +2462,6 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, } if (IsWin64) { - const TargetFrameLowering &TFI = *Subtarget->getFrameLowering(); // Get to the caller-allocated home save location. Add 8 to account // for the return address. int HomeOffset = TFI.getOffsetOfLocalArea() + 8; @@ -2505,6 +2514,27 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, if (!MemOps.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); + } else if (IsWinEHOutlined) { + // Get to the caller-allocated home save location. Add 8 to account + // for the return address. + int HomeOffset = TFI.getOffsetOfLocalArea() + 8; + FuncInfo->setRegSaveFrameIndex(MFI->CreateFixedObject( + /*Size=*/1, /*SPOffset=*/HomeOffset + 8, /*Immutable=*/false)); + + MMI.getWinEHFuncInfo(Fn) + .CatchHandlerParentFrameObjIdx[const_cast<Function *>(Fn)] = + FuncInfo->getRegSaveFrameIndex(); + + // Store the second integer parameter (rdx) into rsp+16 relative to the + // stack pointer at the entry of the function. + SDValue RSFIN = + DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), getPointerTy()); + unsigned GPR = MF.addLiveIn(X86::RDX, &X86::GR64RegClass); + SDValue Val = DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64); + Chain = DAG.getStore( + Val.getValue(1), dl, Val, RSFIN, + MachinePointerInfo::getFixedStack(FuncInfo->getRegSaveFrameIndex()), + /*isVolatile=*/true, /*isNonTemporal=*/false, /*Alignment=*/0); } if (isVarArg && MFI->hasMustTailInVarArgFunc()) { @@ -2571,6 +2601,17 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, FuncInfo->setArgumentStackSize(StackSize); + if (IsWinEHParent) { + int UnwindHelpFI = MFI->CreateStackObject(8, 8, /*isSS=*/false); + SDValue StackSlot = DAG.getFrameIndex(UnwindHelpFI, MVT::i64); + MMI.getWinEHFuncInfo(MF.getFunction()).UnwindHelpFrameIdx = UnwindHelpFI; + SDValue Neg2 = DAG.getConstant(-2, MVT::i64); + Chain = DAG.getStore(Chain, dl, Neg2, StackSlot, + MachinePointerInfo::getFixedStack(UnwindHelpFI), + /*isVolatile=*/true, + /*isNonTemporal=*/false, /*Alignment=*/0); + } + return Chain; } @@ -4420,6 +4461,29 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, SDLoc dl(Op); SDValue V; bool First = true; + + // SSE4.1 - use PINSRB to insert each byte directly. + if (Subtarget->hasSSE41()) { + for (unsigned i = 0; i < 16; ++i) { + bool isNonZero = (NonZeros & (1 << i)) != 0; + if (isNonZero) { + if (First) { + if (NumZero) + V = getZeroVector(MVT::v16i8, Subtarget, DAG, dl); + else + V = DAG.getUNDEF(MVT::v16i8); + First = false; + } + V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, + MVT::v16i8, V, Op.getOperand(i), + DAG.getIntPtrConstant(i)); + } + } + + return V; + } + + // Pre-SSE4.1 - merge byte pairs and insert with PINSRW. for (unsigned i = 0; i < 16; ++i) { bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; if (ThisIsNonZero && First) { @@ -5650,14 +5714,24 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); } + // We can't directly insert an i8 or i16 into a vector, so zero extend + // it to i32 first. if (ExtVT == MVT::i16 || ExtVT == MVT::i8) { Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); - Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item); if (VT.is256BitVector()) { - SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl); - Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl); + if (Subtarget->hasAVX()) { + Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v8i32, Item); + Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); + } else { + // Without AVX, we need to extend to a 128-bit vector and then + // insert into the 256-bit vector. + Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item); + SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl); + Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl); + } } else { assert(VT.is128BitVector() && "Expected an SSE value type!"); + Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item); Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); } return DAG.getNode(ISD::BITCAST, dl, VT, Item); @@ -5877,7 +5951,7 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); unsigned NumElems = ResVT.getVectorNumElements(); - if(ResVT.is256BitVector()) + if (ResVT.is256BitVector()) return Concat128BitVectors(V1, V2, ResVT, NumElems, DAG, dl); if (Op.getNumOperands() == 4) { @@ -9281,15 +9355,6 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, if (isShuffleEquivalent(V1, V2, Mask, {5, 1, 7, 3})) return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V2, V1); - // If we have a single input to the zero element, insert that into V1 if we - // can do so cheaply. - int NumV2Elements = - std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; }); - if (NumV2Elements == 1 && Mask[0] >= 4) - if (SDValue Insertion = lowerVectorShuffleAsElementInsertion( - DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG)) - return Insertion; - if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG)) return Blend; @@ -9432,15 +9497,6 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, ArrayRef<int> Mask = SVOp->getMask(); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); - // If we have a single input to the zero element, insert that into V1 if we - // can do so cheaply. - int NumV2Elements = - std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 8; }); - if (NumV2Elements == 1 && Mask[0] >= 8) - if (SDValue Insertion = lowerVectorShuffleAsElementInsertion( - DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG)) - return Insertion; - if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG)) return Blend; @@ -9811,6 +9867,18 @@ static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); ArrayRef<int> Mask = SVOp->getMask(); + // If we have a single input to the zero element, insert that into V1 if we + // can do so cheaply. + int NumElts = VT.getVectorNumElements(); + int NumV2Elements = std::count_if(Mask.begin(), Mask.end(), [NumElts](int M) { + return M >= NumElts; + }); + + if (NumV2Elements == 1 && Mask[0] >= NumElts) + if (SDValue Insertion = lowerVectorShuffleAsElementInsertion( + DL, VT, V1, V2, Mask, Subtarget, DAG)) + return Insertion; + // There is a really nice hard cut-over between AVX1 and AVX2 that means we can // check for those subtargets here and avoid much of the subtarget querying in // the per-vector-type lowering routines. With AVX1 we have essentially *zero* @@ -11903,7 +11971,7 @@ static SDValue LowerZERO_EXTEND_AVX512(SDValue Op, // Now we have only mask extension assert(InVT.getVectorElementType() == MVT::i1); SDValue Cst = DAG.getTargetConstant(1, ExtVT.getScalarType()); - const Constant *C = (dyn_cast<ConstantSDNode>(Cst))->getConstantIntValue(); + const Constant *C = cast<ConstantSDNode>(Cst)->getConstantIntValue(); SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy()); unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment(); SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP, @@ -11979,7 +12047,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { } SDValue Cst = DAG.getTargetConstant(1, InVT.getVectorElementType()); - const Constant *C = (dyn_cast<ConstantSDNode>(Cst))->getConstantIntValue(); + const Constant *C = cast<ConstantSDNode>(Cst)->getConstantIntValue(); SDValue CP = DAG.getConstantPool(C, getPointerTy()); unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment(); SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP, @@ -12750,6 +12818,16 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op, return SDValue(); } +/// If we have at least two divisions that use the same divisor, convert to +/// multplication by a reciprocal. This may need to be adjusted for a given +/// CPU if a division's cost is not at least twice the cost of a multiplication. +/// This is because we still need one division to calculate the reciprocal and +/// then we need two multiplies by that reciprocal as replacements for the +/// original divisions. +bool X86TargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const { + return NumUsers > 1; +} + static bool isAllOnes(SDValue V) { ConstantSDNode *C = dyn_cast<ConstantSDNode>(V); return C && C->isAllOnesValue(); @@ -14427,7 +14505,7 @@ static SDValue LowerVACOPY(SDValue Op, const X86Subtarget *Subtarget, return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(24), 8, /*isVolatile*/false, - false, + false, false, MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); } @@ -15220,10 +15298,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, } case PREFETCH: { SDValue Hint = Op.getOperand(6); - unsigned HintVal; - if (dyn_cast<ConstantSDNode> (Hint) == nullptr || - (HintVal = dyn_cast<ConstantSDNode> (Hint)->getZExtValue()) > 1) - llvm_unreachable("Wrong prefetch hint in intrinsic: should be 0 or 1"); + unsigned HintVal = cast<ConstantSDNode>(Hint)->getZExtValue(); + assert(HintVal < 2 && "Wrong prefetch hint in intrinsic: should be 0 or 1"); unsigned Opcode = (HintVal ? IntrData->Opc1 : IntrData->Opc0); SDValue Chain = Op.getOperand(0); SDValue Mask = Op.getOperand(2); @@ -24175,7 +24251,7 @@ TargetLowering::ConstraintWeight break; case 'G': case 'C': - if (dyn_cast<ConstantFP>(CallOperandVal)) { + if (isa<ConstantFP>(CallOperandVal)) { weight = CW_Constant; } break; |