diff options
Diffstat (limited to 'lib/Target/ARM/ARMISelLowering.cpp')
| -rw-r--r-- | lib/Target/ARM/ARMISelLowering.cpp | 308 |
1 files changed, 227 insertions, 81 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index c6c1f5b..477b5f4 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -43,7 +43,6 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/ADT/VectorExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" @@ -262,7 +261,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setLibcallName(RTLIB::SRL_I128, 0); setLibcallName(RTLIB::SRA_I128, 0); - if (Subtarget->isAAPCS_ABI()) { + if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetDarwin()) { // Double-precision floating-point arithmetic helper functions // RTABI chapter 4.1.2, Table 2 setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd"); @@ -387,8 +386,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // Long long helper functions // RTABI chapter 4.2, Table 9 setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul"); - setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod"); - setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod"); setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl"); setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr"); setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr"); @@ -404,21 +401,28 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv"); setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv"); setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv"); + setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod"); setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv"); setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv"); setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv"); + setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod"); setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS); setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS); setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS); setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS); setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); // Memory operations // RTABI chapter 4.3.4 setLibcallName(RTLIB::MEMCPY, "__aeabi_memcpy"); setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove"); setLibcallName(RTLIB::MEMSET, "__aeabi_memset"); + setLibcallCallingConv(RTLIB::MEMCPY, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::MEMMOVE, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::MEMSET, CallingConv::ARM_AAPCS); } // Use divmod compiler-rt calls for iOS 5.0 and later. @@ -529,9 +533,13 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::SETCC, MVT::v1i64, Expand); setOperationAction(ISD::SETCC, MVT::v2i64, Expand); // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with - // a destination type that is wider than the source. + // a destination type that is wider than the source, and nor does + // it have a FP_TO_[SU]INT instruction with a narrower destination than + // source. setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom); setTargetDAGCombine(ISD::INTRINSIC_VOID); setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); @@ -551,7 +559,15 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setTargetDAGCombine(ISD::FP_TO_UINT); setTargetDAGCombine(ISD::FDIV); - setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Expand); + // It is legal to extload from v4i8 to v4i16 or v4i32. + MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8, + MVT::v4i16, MVT::v2i16, + MVT::v2i32}; + for (unsigned i = 0; i < 6; ++i) { + setLoadExtAction(ISD::EXTLOAD, Tys[i], Legal); + setLoadExtAction(ISD::ZEXTLOAD, Tys[i], Legal); + setLoadExtAction(ISD::SEXTLOAD, Tys[i], Legal); + } } computeRegisterProperties(); @@ -643,10 +659,15 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::VAEND, MVT::Other, Expand); setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); - setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); - setExceptionPointerRegister(ARM::R0); - setExceptionSelectorRegister(ARM::R1); + + if (!Subtarget->isTargetDarwin()) { + // Non-Darwin platforms may return values in these registers via the + // personality function. + setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); + setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); + setExceptionPointerRegister(ARM::R0); + setExceptionSelectorRegister(ARM::R1); + } setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use @@ -773,10 +794,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::MUL); - if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON()) - setTargetDAGCombine(ISD::OR); - if (Subtarget->hasNEON()) + if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON()) { setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::OR); + setTargetDAGCombine(ISD::XOR); + } + + if (Subtarget->hasV6Ops()) + setTargetDAGCombine(ISD::SRL); setStackPointerRegisterToSaveRestore(ARM::SP); @@ -869,7 +894,11 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; case ARMISD::BCC_i64: return "ARMISD::BCC_i64"; case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; + case ARMISD::CMOV: return "ARMISD::CMOV"; + case ARMISD::CAND: return "ARMISD::CAND"; + case ARMISD::COR: return "ARMISD::COR"; + case ARMISD::CXOR: return "ARMISD::CXOR"; case ARMISD::RBIT: return "ARMISD::RBIT"; @@ -990,7 +1019,7 @@ EVT ARMTargetLowering::getSetCCResultType(EVT VT) const { /// getRegClassFor - Return the register class that should be used for the /// specified value type. -TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { +const TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { // Map v4i64 to QQ registers but do not make the type legal. Similarly map // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to // load / store 4 to 8 consecutive D registers. @@ -1128,7 +1157,9 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); } case CallingConv::ARM_AAPCS_VFP: - return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); + if (!isVarArg) + return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); + // Fallthrough case CallingConv::ARM_AAPCS: return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); case CallingConv::ARM_APCS: @@ -1255,7 +1286,7 @@ void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, SDValue ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, - bool &isTailCall, + bool doesNotRet, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, @@ -1551,12 +1582,20 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (Subtarget->isThumb()) { if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; + else if (doesNotRet && isDirect && !isARMFunc && + Subtarget->hasRAS() && !Subtarget->isThumb1Only()) + // "mov lr, pc; b _foo" to avoid confusing the RSP + CallOpc = ARMISD::CALL_NOLINK; else CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; } else { - CallOpc = (isDirect || Subtarget->hasV5TOps()) - ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL) - : ARMISD::CALL_NOLINK; + if (!isDirect && !Subtarget->hasV5TOps()) { + CallOpc = ARMISD::CALL_NOLINK; + } else if (doesNotRet && isDirect && Subtarget->hasRAS()) + // "mov lr, pc; b _foo" to avoid confusing the RSP + CallOpc = ARMISD::CALL_NOLINK; + else + CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL; } std::vector<SDValue> Ops; @@ -1569,6 +1608,12 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + if (InFlag.getNode()) Ops.push_back(InFlag); @@ -1897,7 +1942,7 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const { return false; unsigned NumCopies = 0; - SDNode* Copies[2]; + SDNode* Copies[2] = { 0, 0 }; SDNode *Use = *N->use_begin(); if (Use->getOpcode() == ISD::CopyToReg) { Copies[NumCopies++] = Use; @@ -1932,7 +1977,7 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const { UI != UE; ++UI) { if (UI->getOpcode() == ISD::CopyToReg) { SDNode *Use = *UI; - if (Use == Copies[0] || Use == Copies[1]) + if (Use == Copies[0] || ((NumCopies == 2) && (Use == Copies[1]))) continue; return false; } @@ -2043,7 +2088,8 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, std::pair<SDValue, SDValue> CallResult = LowerCallTo(Chain, (Type *) Type::getInt32Ty(*DAG.getContext()), false, false, false, false, - 0, CallingConv::C, false, /*isReturnValueUsed=*/true, + 0, CallingConv::C, /*isTailCall=*/false, + /*doesNotRet=*/false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); return CallResult.first; } @@ -2167,7 +2213,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - // FIXME: Enable this for static codegen when tool issues are fixed. + // FIXME: Enable this for static codegen when tool issues are fixed. Also + // update ARMFastISel::ARMMaterializeGV. if (Subtarget->useMovt() && RelocM != Reloc::Static) { ++NumMovwMovt; // FIXME: Once remat is capable of dealing with instructions with register @@ -2398,7 +2445,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - TargetRegisterClass *RC; + const TargetRegisterClass *RC; if (AFI->isThumb1OnlyFunction()) RC = ARM::tGPRRegisterClass; else @@ -2484,7 +2531,7 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, SmallVector<SDValue, 4> MemOps; for (; firstRegToSaveIndex < 4; ++firstRegToSaveIndex) { - TargetRegisterClass *RC; + const TargetRegisterClass *RC; if (AFI->isThumb1OnlyFunction()) RC = ARM::tGPRRegisterClass; else @@ -2567,7 +2614,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); } else { - TargetRegisterClass *RC; + const TargetRegisterClass *RC; if (RegVT == MVT::f32) RC = ARM::SPRRegisterClass; @@ -2809,6 +2856,11 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { } } + // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the + // undefined bits before doing a full-word comparison with zero. + Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond, + DAG.getConstant(1, Cond.getValueType())); + return DAG.getSelectCC(dl, Cond, DAG.getConstant(0, Cond.getValueType()), SelectTrue, SelectFalse, ISD::SETNE); @@ -2926,12 +2978,11 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { SDValue Dest = Op.getOperand(4); DebugLoc dl = Op.getDebugLoc(); - bool SeenZero = false; - if (canChangeToInt(LHS, SeenZero, Subtarget) && - canChangeToInt(RHS, SeenZero, Subtarget) && - // If one of the operand is zero, it's safe to ignore the NaN case since - // we only care about equality comparisons. - (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) { + bool LHSSeenZero = false; + bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget); + bool RHSSeenZero = false; + bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget); + if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) { // If unsafe fp math optimization is enabled and there are no other uses of // the CMP operands, and the condition code is EQ or NE, we can optimize it // to an integer comparison. @@ -2940,10 +2991,13 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { else if (CC == ISD::SETUNE) CC = ISD::SETNE; + SDValue Mask = DAG.getConstant(0x7fffffff, MVT::i32); SDValue ARMcc; if (LHS.getValueType() == MVT::f32) { - LHS = bitcastf32Toi32(LHS, DAG); - RHS = bitcastf32Toi32(RHS, DAG); + LHS = DAG.getNode(ISD::AND, dl, MVT::i32, + bitcastf32Toi32(LHS, DAG), Mask); + RHS = DAG.getNode(ISD::AND, dl, MVT::i32, + bitcastf32Toi32(RHS, DAG), Mask); SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, @@ -2954,6 +3008,8 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { SDValue RHS1, RHS2; expandf64Toi32(LHS, DAG, LHS1, LHS2); expandf64Toi32(RHS, DAG, RHS1, RHS2); + LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask); + RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask); ARMCC::CondCodes CondCode = IntCCToARMCC(CC); ARMcc = DAG.getConstant(CondCode, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); @@ -3047,11 +3103,21 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); - assert(VT.getVectorElementType() == MVT::i32 && "Unexpected custom lowering"); + DebugLoc dl = Op.getDebugLoc(); - if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32) - return Op; - return DAG.UnrollVectorOp(Op.getNode()); + if (Op.getValueType().getVectorElementType() == MVT::i32) { + if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32) + return Op; + return DAG.UnrollVectorOp(Op.getNode()); + } + + assert(Op.getOperand(0).getValueType() == MVT::v4f32 && + "Invalid type for custom lowering!"); + if (VT != MVT::v4i16) + return DAG.UnrollVectorOp(Op.getNode()); + + Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0)); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Op); } static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { @@ -3063,8 +3129,7 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { unsigned Opc; switch (Op.getOpcode()) { - default: - assert(0 && "Invalid opcode!"); + default: llvm_unreachable("Invalid opcode!"); case ISD::FP_TO_SINT: Opc = ARMISD::FTOSI; break; @@ -3094,8 +3159,7 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { unsigned CastOpc; unsigned Opc; switch (Op.getOpcode()) { - default: - assert(0 && "Invalid opcode!"); + default: llvm_unreachable("Invalid opcode!"); case ISD::SINT_TO_FP: CastOpc = ISD::SIGN_EXTEND; Opc = ISD::SINT_TO_FP; @@ -3119,8 +3183,7 @@ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { unsigned Opc; switch (Op.getOpcode()) { - default: - assert(0 && "Invalid opcode!"); + default: llvm_unreachable("Invalid opcode!"); case ISD::SINT_TO_FP: Opc = ARMISD::SITOF; break; @@ -3494,7 +3557,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { if (Op.getOperand(1).getValueType().isFloatingPoint()) { switch (SetCCOpcode) { - default: llvm_unreachable("Illegal FP comparison"); break; + default: llvm_unreachable("Illegal FP comparison"); case ISD::SETUNE: case ISD::SETNE: Invert = true; // Fallthrough case ISD::SETOEQ: @@ -3533,7 +3596,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { } else { // Integer comparisons. switch (SetCCOpcode) { - default: llvm_unreachable("Illegal integer comparison"); break; + default: llvm_unreachable("Illegal integer comparison"); case ISD::SETNE: Invert = true; case ISD::SETEQ: Opc = ARMISD::VCEQ; break; case ISD::SETLT: Swap = true; @@ -3740,14 +3803,13 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, default: llvm_unreachable("unexpected size for isNEONModifiedImm"); - return SDValue(); } unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm); return DAG.getTargetConstant(EncodedVal, MVT::i32); } -static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT, +static bool isVEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseVEXT, unsigned &Imm) { unsigned NumElts = VT.getVectorNumElements(); ReverseVEXT = false; @@ -3786,8 +3848,7 @@ static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT, /// isVREVMask - Check if a vector shuffle corresponds to a VREV /// instruction with the specified blocksize. (The order of the elements /// within each block of the vector is reversed.) -static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT, - unsigned BlockSize) { +static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) { assert((BlockSize==16 || BlockSize==32 || BlockSize==64) && "Only possible block sizes for VREV are: 16, 32, 64"); @@ -3813,15 +3874,14 @@ static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT, return true; } -static bool isVTBLMask(const SmallVectorImpl<int> &M, EVT VT) { +static bool isVTBLMask(ArrayRef<int> M, EVT VT) { // We can handle <8 x i8> vector shuffles. If the index in the mask is out of // range, then 0 is placed into the resulting vector. So pretty much any mask // of 8 elements can work here. return VT == MVT::v8i8 && M.size() == 8; } -static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT, - unsigned &WhichResult) { +static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { unsigned EltSz = VT.getVectorElementType().getSizeInBits(); if (EltSz == 64) return false; @@ -3839,8 +3899,7 @@ static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT, /// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. -static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, - unsigned &WhichResult) { +static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ unsigned EltSz = VT.getVectorElementType().getSizeInBits(); if (EltSz == 64) return false; @@ -3855,8 +3914,7 @@ static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, return true; } -static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT, - unsigned &WhichResult) { +static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { unsigned EltSz = VT.getVectorElementType().getSizeInBits(); if (EltSz == 64) return false; @@ -3879,8 +3937,7 @@ static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT, /// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>, -static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, - unsigned &WhichResult) { +static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ unsigned EltSz = VT.getVectorElementType().getSizeInBits(); if (EltSz == 64) return false; @@ -3904,8 +3961,7 @@ static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, return true; } -static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT, - unsigned &WhichResult) { +static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { unsigned EltSz = VT.getVectorElementType().getSizeInBits(); if (EltSz == 64) return false; @@ -3930,8 +3986,7 @@ static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT, /// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. -static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, - unsigned &WhichResult) { +static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ unsigned EltSz = VT.getVectorElementType().getSizeInBits(); if (EltSz == 64) return false; @@ -4363,7 +4418,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, } static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, - SmallVectorImpl<int> &ShuffleMask, + ArrayRef<int> ShuffleMask, SelectionDAG &DAG) { // Check to see if we can use the VTBL instruction. SDValue V1 = Op.getOperand(0); @@ -4371,7 +4426,7 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, DebugLoc DL = Op.getDebugLoc(); SmallVector<SDValue, 8> VTBLMask; - for (SmallVectorImpl<int>::iterator + for (ArrayRef<int>::iterator I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I) VTBLMask.push_back(DAG.getConstant(*I, MVT::i32)); @@ -4391,7 +4446,6 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); EVT VT = Op.getValueType(); ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); - SmallVector<int, 8> ShuffleMask; // Convert shuffles that are directly supported on NEON to target-specific // DAG nodes, instead of keeping them as shuffles and matching them again @@ -4399,7 +4453,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // of inconsistencies between legalization and selection. // FIXME: floating-point vectors should be canonicalized to integer vectors // of the same time so that they get CSEd properly. - SVN->getMask(ShuffleMask); + ArrayRef<int> ShuffleMask = SVN->getMask(); unsigned EltSize = VT.getVectorElementType().getSizeInBits(); if (EltSize <= 32) { @@ -4959,7 +5013,7 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { unsigned Opc; bool ExtraOp = false; switch (Op.getOpcode()) { - default: assert(0 && "Invalid code"); + default: llvm_unreachable("Invalid code"); case ISD::ADDC: Opc = ARMISD::ADDC; break; case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break; case ISD::SUBC: Opc = ARMISD::SUBC; break; @@ -5071,7 +5125,6 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); } - return SDValue(); } /// ReplaceNodeResults - Replace the results of node with an illegal result @@ -5083,7 +5136,6 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, switch (N->getOpcode()) { default: llvm_unreachable("Don't know how to custom expand this!"); - break; case ISD::BITCAST: Res = ExpandBITCAST(N, DAG); break; @@ -5279,7 +5331,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); - TargetRegisterClass *TRC = + const TargetRegisterClass *TRC = isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; unsigned scratch = MRI.createVirtualRegister(TRC); unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC); @@ -5389,7 +5441,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); - TargetRegisterClass *TRC = + const TargetRegisterClass *TRC = isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; unsigned scratch = MRI.createVirtualRegister(TRC); unsigned scratch2 = MRI.createVirtualRegister(TRC); @@ -5499,7 +5551,7 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); - TargetRegisterClass *TRC = + const TargetRegisterClass *TRC = isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; unsigned storesuccess = MRI.createVirtualRegister(TRC); @@ -5792,7 +5844,12 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile, 4, 4); - BuildMI(DispatchBB, dl, TII->get(ARM::eh_sjlj_dispatchsetup)); + if (AFI->isThumb1OnlyFunction()) + BuildMI(DispatchBB, dl, TII->get(ARM::tInt_eh_sjlj_dispatchsetup)); + else if (!Subtarget->hasVFP2()) + BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup_nofp)); + else + BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup)); unsigned NumLPads = LPadList.size(); if (Subtarget->isThumb2()) { @@ -6014,7 +6071,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { // N.B. the order the invoke BBs are processed in doesn't matter here. const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII); const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); - const unsigned *SavedRegs = RI.getCalleeSavedRegs(MF); + const uint16_t *SavedRegs = RI.getCalleeSavedRegs(MF); SmallVector<MachineBasicBlock*, 64> MBBLPads; for (SmallPtrSet<MachineBasicBlock*, 64>::iterator I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) { @@ -6666,7 +6723,7 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break; case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break; default: - assert(0 && "Invalid vector element type for padd optimization."); + llvm_unreachable("Invalid vector element type for padd optimization."); } SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), @@ -6818,8 +6875,52 @@ static SDValue PerformMULCombine(SDNode *N, return SDValue(); } +static bool isCMOVWithZeroOrAllOnesLHS(SDValue N, bool AllOnes) { + if (N.getOpcode() != ARMISD::CMOV || !N.getNode()->hasOneUse()) + return false; + + SDValue FalseVal = N.getOperand(0); + ConstantSDNode *C = dyn_cast<ConstantSDNode>(FalseVal); + if (!C) + return false; + if (AllOnes) + return C->isAllOnesValue(); + return C->isNullValue(); +} + +/// formConditionalOp - Combine an operation with a conditional move operand +/// to form a conditional op. e.g. (or x, (cmov 0, y, cond)) => (or.cond x, y) +/// (and x, (cmov -1, y, cond)) => (and.cond, x, y) +static SDValue formConditionalOp(SDNode *N, SelectionDAG &DAG, + bool Commutable) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + bool isAND = N->getOpcode() == ISD::AND; + bool isCand = isCMOVWithZeroOrAllOnesLHS(N1, isAND); + if (!isCand && Commutable) { + isCand = isCMOVWithZeroOrAllOnesLHS(N0, isAND); + if (isCand) + std::swap(N0, N1); + } + if (!isCand) + return SDValue(); + + unsigned Opc = 0; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected node"); + case ISD::AND: Opc = ARMISD::CAND; break; + case ISD::OR: Opc = ARMISD::COR; break; + case ISD::XOR: Opc = ARMISD::CXOR; break; + } + return DAG.getNode(Opc, N->getDebugLoc(), N->getValueType(0), N0, + N1.getOperand(1), N1.getOperand(2), N1.getOperand(3), + N1.getOperand(4)); +} + static SDValue PerformANDCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { // Attempt to use immediate-form VBIC BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); @@ -6850,6 +6951,13 @@ static SDValue PerformANDCombine(SDNode *N, } } + if (!Subtarget->isThumb1Only()) { + // (and x, (cmov -1, y, cond)) => (and.cond x, y) + SDValue CAND = formConditionalOp(N, DAG, true); + if (CAND.getNode()) + return CAND; + } + return SDValue(); } @@ -6886,6 +6994,13 @@ static SDValue PerformORCombine(SDNode *N, } } + if (!Subtarget->isThumb1Only()) { + // (or x, (cmov 0, y, cond)) => (or.cond x, y) + SDValue COR = formConditionalOp(N, DAG, true); + if (COR.getNode()) + return COR; + } + SDValue N0 = N->getOperand(0); if (N0.getOpcode() != ISD::AND) return SDValue(); @@ -7034,6 +7149,25 @@ static SDValue PerformORCombine(SDNode *N, return SDValue(); } +static SDValue PerformXORCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + EVT VT = N->getValueType(0); + SelectionDAG &DAG = DCI.DAG; + + if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return SDValue(); + + if (!Subtarget->isThumb1Only()) { + // (xor x, (cmov 0, y, cond)) => (xor.cond x, y) + SDValue CXOR = formConditionalOp(N, DAG, true); + if (CXOR.getNode()) + return CXOR; + } + + return SDValue(); +} + /// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff /// the bits being cleared by the AND are not demanded by the BFI. static SDValue PerformBFICombine(SDNode *N, @@ -7331,7 +7465,7 @@ static SDValue CombineBaseUpdate(SDNode *N, if (isIntrinsic) { unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); switch (IntNo) { - default: assert(0 && "unexpected intrinsic for Neon base update"); + default: llvm_unreachable("unexpected intrinsic for Neon base update"); case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD; NumVecs = 1; break; case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD; @@ -7364,7 +7498,7 @@ static SDValue CombineBaseUpdate(SDNode *N, } else { isLaneOp = true; switch (N->getOpcode()) { - default: assert(0 && "unexpected opcode for Neon base update"); + default: llvm_unreachable("unexpected opcode for Neon base update"); case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break; case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break; case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break; @@ -7857,6 +7991,18 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); + if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) { + // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high + // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16. + SDValue N1 = N->getOperand(1); + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { + SDValue N0 = N->getOperand(0); + if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP && + DAG.MaskedValueIsZero(N0.getOperand(0), + APInt::getHighBitsSet(32, 16))) + return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, N0, N1); + } + } // Nothing to be done for scalar shifts. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -8085,7 +8231,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::SUB: return PerformSUBCombine(N, DCI); case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); case ISD::OR: return PerformORCombine(N, DCI, Subtarget); - case ISD::AND: return PerformANDCombine(N, DCI); + case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget); + case ISD::AND: return PerformANDCombine(N, DCI, Subtarget); case ARMISD::BFI: return PerformBFICombine(N, DCI); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG); @@ -8377,7 +8524,6 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, if (Scale & 1) return false; return isPowerOf2_32(Scale); } - break; } return true; } |
