diff options
Diffstat (limited to 'lib/Target/ARM/ARMISelLowering.cpp')
| -rw-r--r-- | lib/Target/ARM/ARMISelLowering.cpp | 351 |
1 files changed, 218 insertions, 133 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 34023af..c6c1f5b 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -40,7 +40,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/Target/TargetOptions.h" @@ -73,7 +72,7 @@ ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), cl::init(true)); -namespace llvm { +namespace { class ARMCCState : public CCState { public: ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, @@ -108,8 +107,14 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, EVT ElemTy = VT.getVectorElementType(); if (ElemTy != MVT::i64 && ElemTy != MVT::f64) setOperationAction(ISD::SETCC, VT.getSimpleVT(), Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getSimpleVT(), Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); - if (ElemTy != MVT::i32) { + if (ElemTy == MVT::i32) { + setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Custom); + setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Custom); + setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom); + setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom); + } else { setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand); setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand); setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand); @@ -121,18 +126,12 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Legal); setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand); setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, VT.getSimpleVT(), Expand); if (VT.isInteger()) { setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom); setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom); setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom); - setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand); - setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand); - for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) - setTruncStoreAction(VT.getSimpleVT(), - (MVT::SimpleValueType)InnerVT, Expand); } - setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand); // Promote all bit-wise operations. if (VT.isInteger() && VT != PromotedBitwiseVT) { @@ -433,7 +432,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) addRegisterClass(MVT::i32, ARM::tGPRRegisterClass); else addRegisterClass(MVT::i32, ARM::GPRRegisterClass); - if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { + if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && + !Subtarget->isThumb1Only()) { addRegisterClass(MVT::f32, ARM::SPRRegisterClass); if (!Subtarget->isFPOnlySP()) addRegisterClass(MVT::f64, ARM::DPRRegisterClass); @@ -441,6 +441,17 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setTruncStoreAction(MVT::f64, MVT::f32, Expand); } + for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { + for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) + setTruncStoreAction((MVT::SimpleValueType)VT, + (MVT::SimpleValueType)InnerVT, Expand); + setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand); + setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand); + setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand); + } + if (Subtarget->hasNEON()) { addDRTypeForNEON(MVT::v2f32); addDRTypeForNEON(MVT::v8i8); @@ -457,13 +468,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // v2f64 is legal so that QR subregs can be extracted as f64 elements, but // neither Neon nor VFP support any arithmetic operations on it. + // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively + // supported for v4f32. setOperationAction(ISD::FADD, MVT::v2f64, Expand); setOperationAction(ISD::FSUB, MVT::v2f64, Expand); setOperationAction(ISD::FMUL, MVT::v2f64, Expand); + // FIXME: Code duplication: FDIV and FREM are expanded always, see + // ARMTargetLowering::addTypeForNEON method for details. setOperationAction(ISD::FDIV, MVT::v2f64, Expand); setOperationAction(ISD::FREM, MVT::v2f64, Expand); + // FIXME: Create unittest. + // In another words, find a way when "copysign" appears in DAG with vector + // operands. setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); + // FIXME: Code duplication: SETCC has custom operation action, see + // ARMTargetLowering::addTypeForNEON method for details. setOperationAction(ISD::SETCC, MVT::v2f64, Expand); + // FIXME: Create unittest for FNEG and for FABS. setOperationAction(ISD::FNEG, MVT::v2f64, Expand); setOperationAction(ISD::FABS, MVT::v2f64, Expand); setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); @@ -476,13 +497,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); setOperationAction(ISD::FEXP, MVT::v2f64, Expand); setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); + // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR. setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); setOperationAction(ISD::FRINT, MVT::v2f64, Expand); setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); - - setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand); + + setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); + setOperationAction(ISD::FSIN, MVT::v4f32, Expand); + setOperationAction(ISD::FCOS, MVT::v4f32, Expand); + setOperationAction(ISD::FPOWI, MVT::v4f32, Expand); + setOperationAction(ISD::FPOW, MVT::v4f32, Expand); + setOperationAction(ISD::FLOG, MVT::v4f32, Expand); + setOperationAction(ISD::FLOG2, MVT::v4f32, Expand); + setOperationAction(ISD::FLOG10, MVT::v4f32, Expand); + setOperationAction(ISD::FEXP, MVT::v4f32, Expand); + setOperationAction(ISD::FEXP2, MVT::v4f32, Expand); // Neon does not support some operations on v1i64 and v2i64 types. setOperationAction(ISD::MUL, MVT::v1i64, Expand); @@ -578,6 +609,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) setOperationAction(ISD::CTLZ, MVT::i32, Expand); + // These just redirect to CTTZ and CTLZ on ARM. + setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand); + // Only ARMv6 has BSWAP. if (!Subtarget->hasV6Ops()) setOperationAction(ISD::BSWAP, MVT::i32, Expand); @@ -666,7 +701,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { + if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && + !Subtarget->isThumb1Only()) { // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR // iff target supports vfp2. setOperationAction(ISD::BITCAST, MVT::i64, Custom); @@ -678,7 +714,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (Subtarget->isTargetDarwin()) { setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); - setOperationAction(ISD::EH_SJLJ_DISPATCHSETUP, MVT::Other, Custom); setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); } @@ -705,7 +740,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FCOS, MVT::f64, Expand); setOperationAction(ISD::FREM, MVT::f64, Expand); setOperationAction(ISD::FREM, MVT::f32, Expand); - if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { + if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && + !Subtarget->isThumb1Only()) { setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); } @@ -716,7 +752,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FMA, MVT::f32, Expand); // Various VFP goodness - if (!UseSoftFloat && !Subtarget->isThumb1Only()) { + if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) { // int <-> fp are custom expanded into bit_convert + ARMISD ops. if (Subtarget->hasVFP2()) { setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); @@ -744,13 +780,16 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setStackPointerRegisterToSaveRestore(ARM::SP); - if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2()) + if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() || + !Subtarget->hasVFP2()) setSchedulingPreference(Sched::RegPressure); else setSchedulingPreference(Sched::Hybrid); //// temporary - rewrite interface to use type maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1; + maxStoresPerMemset = 16; + maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4; // On ARM arguments smaller than 4 bytes are extended, so all arguments // are at least 4 bytes aligned. @@ -853,7 +892,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; - case ARMISD::EH_SJLJ_DISPATCHSETUP:return "ARMISD::EH_SJLJ_DISPATCHSETUP"; case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN"; @@ -901,6 +939,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM"; case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM"; + case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM"; case ARMISD::VDUP: return "ARMISD::VDUP"; case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; case ARMISD::VEXT: return "ARMISD::VEXT"; @@ -986,7 +1025,7 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { if (VT == MVT::Glue || VT == MVT::Other) continue; if (VT.isFloatingPoint() || VT.isVector()) - return Sched::Latency; + return Sched::ILP; } if (!N->isMachineOpcode()) @@ -1001,7 +1040,7 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { return Sched::RegPressure; if (!Itins->isEmpty() && Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2) - return Sched::Latency; + return Sched::ILP; return Sched::RegPressure; } @@ -1083,7 +1122,8 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, if (!Subtarget->isAAPCS_ABI()) return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); else if (Subtarget->hasVFP2() && - FloatABIType == FloatABI::Hard && !isVarArg) + getTargetMachine().Options.FloatABIType == FloatABI::Hard && + !isVarArg) return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); } @@ -1334,7 +1374,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(j, Load)); } @@ -1350,12 +1390,10 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset); SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, MVT::i32); - // TODO: Disable AlwaysInline when it becomes possible - // to emit a nested call sequence. MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), /*isVolatile=*/false, - /*AlwaysInline=*/true, + /*AlwaysInline=*/false, MachinePointerInfo(0), MachinePointerInfo(0))); @@ -1429,7 +1467,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) { const char *Sym = S->getSymbol(); @@ -1444,7 +1482,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); } } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { const GlobalValue *GV = G->getGlobal(); @@ -1465,7 +1503,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); Callee = DAG.getNode(ARMISD::PIC_ADD, dl, getPointerTy(), Callee, PICLabel); @@ -1494,7 +1532,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); Callee = DAG.getNode(ARMISD::PIC_ADD, dl, getPointerTy(), Callee, PICLabel); @@ -1965,7 +2003,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); if (RelocM == Reloc::Static) return Result; SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); @@ -1989,7 +2027,7 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); SDValue Chain = Argument.getValue(1); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); @@ -2037,7 +2075,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); Chain = Offset.getValue(1); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); @@ -2045,7 +2083,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); } else { // local exec model ARMConstantPoolValue *CPV = @@ -2054,7 +2092,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); } // The address of the thread local variable is the add of the thread @@ -2092,21 +2130,20 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); SDValue Chain = Result.getValue(1); SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); if (!UseGOTOFF) Result = DAG.getLoad(PtrVT, dl, Chain, Result, - MachinePointerInfo::getGOT(), false, false, 0); + MachinePointerInfo::getGOT(), + false, false, false, 0); return Result; } // If we have T2 ops, we can materialize the address directly via movt/movw - // pair. This is always cheaper in terms of performance, but uses at least 2 - // extra bytes. - if (Subtarget->useMovt() && - !DAG.getMachineFunction().getFunction()->hasFnAttr(Attribute::OptimizeForSize)) { + // pair. This is always cheaper. + if (Subtarget->useMovt()) { ++NumMovwMovt; // FIXME: Once remat is capable of dealing with instructions with register // operands, expand this into two nodes. @@ -2117,7 +2154,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); } } @@ -2131,8 +2168,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); // FIXME: Enable this for static codegen when tool issues are fixed. - if (Subtarget->useMovt() && RelocM != Reloc::Static && - !DAG.getMachineFunction().getFunction()->hasFnAttr(Attribute::OptimizeForSize)) { + if (Subtarget->useMovt() && RelocM != Reloc::Static) { ++NumMovwMovt; // FIXME: Once remat is capable of dealing with instructions with register // operands, expand this into two nodes. @@ -2146,7 +2182,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, DAG.getTargetGlobalAddress(GV, dl, PtrVT)); if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, - MachinePointerInfo::getGOT(), false, false, 0); + MachinePointerInfo::getGOT(), + false, false, false, 0); return Result; } @@ -2166,7 +2203,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); SDValue Chain = Result.getValue(1); if (RelocM == Reloc::PIC_) { @@ -2176,7 +2213,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(), - false, false, 0); + false, false, false, 0); return Result; } @@ -2198,20 +2235,12 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); } SDValue -ARMTargetLowering::LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG) - const { - DebugLoc dl = Op.getDebugLoc(); - return DAG.getNode(ARMISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other, - Op.getOperand(0), Op.getOperand(1)); -} - -SDValue ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); SDValue Val = DAG.getConstant(0, MVT::i32); @@ -2256,7 +2285,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); if (RelocM == Reloc::PIC_) { SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); @@ -2388,7 +2417,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, MachinePointerInfo::getFixedStack(FI), - false, false, 0); + false, false, false, 0); } else { Reg = MF.addLiveIn(NextVA.getLocReg(), RC); ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); @@ -2524,7 +2553,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, MachinePointerInfo::getFixedStack(FI), - false, false, 0); + false, false, false, 0); } else { ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); @@ -2615,7 +2644,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo::getFixedStack(FI), - false, false, 0)); + false, false, false, 0)); } lastInsIndex = index; } @@ -2850,7 +2879,7 @@ static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { return DAG.getLoad(MVT::i32, Op.getDebugLoc(), Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(), - Ld->getAlignment()); + Ld->isInvariant(), Ld->getAlignment()); llvm_unreachable("Unknown VFP cmp argument!"); } @@ -2869,7 +2898,7 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, Ld->getChain(), Ptr, Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(), - Ld->getAlignment()); + Ld->isInvariant(), Ld->getAlignment()); EVT PtrType = Ptr.getValueType(); unsigned NewAlign = MinAlign(Ld->getAlignment(), 4); @@ -2879,7 +2908,7 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, Ld->getChain(), NewPtr, Ld->getPointerInfo().getWithOffset(4), Ld->isVolatile(), Ld->isNonTemporal(), - NewAlign); + Ld->isInvariant(), NewAlign); return; } @@ -2953,7 +2982,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); - if (UnsafeFPMath && + if (getTargetMachine().Options.UnsafeFPMath && (CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETNE || CC == ISD::SETUNE)) { SDValue Result = OptimizeVFPBrcond(Op, DAG); @@ -3003,19 +3032,33 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, MachinePointerInfo::getJumpTable(), - false, false, 0); + false, false, false, 0); Chain = Addr.getValue(1); Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); } else { Addr = DAG.getLoad(PTy, dl, Chain, Addr, - MachinePointerInfo::getJumpTable(), false, false, 0); + MachinePointerInfo::getJumpTable(), + false, false, false, 0); Chain = Addr.getValue(1); return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); } } +static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { + EVT VT = Op.getValueType(); + assert(VT.getVectorElementType() == MVT::i32 && "Unexpected custom lowering"); + + if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32) + return Op; + return DAG.UnrollVectorOp(Op.getNode()); +} + static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { + EVT VT = Op.getValueType(); + if (VT.isVector()) + return LowerVectorFP_TO_INT(Op, DAG); + DebugLoc dl = Op.getDebugLoc(); unsigned Opc; @@ -3037,6 +3080,12 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); + if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) { + if (VT.getVectorElementType() == MVT::f32) + return Op; + return DAG.UnrollVectorOp(Op.getNode()); + } + assert(Op.getOperand(0).getValueType() == MVT::v4i16 && "Invalid type for custom lowering!"); if (VT != MVT::v4f32) @@ -3179,7 +3228,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ SDValue Offset = DAG.getConstant(4, MVT::i32); return DAG.getLoad(VT, dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), - MachinePointerInfo(), false, false, 0); + MachinePointerInfo(), false, false, false, 0); } // Return LR, which contains the return address. Mark it an implicit live-in. @@ -3200,7 +3249,7 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { while (Depth--) FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); return FrameAddr; } @@ -3958,6 +4007,15 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val); return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); } + + // Use vmov.f32 to materialize other v2f32 and v4f32 splats. + if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) { + int ImmVal = ARM_AM::getFP32Imm(SplatBits); + if (ImmVal != -1) { + SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32); + return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val); + } + } } } @@ -4350,9 +4408,24 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // If this is undef splat, generate it via "just" vdup, if possible. if (Lane == -1) Lane = 0; + // Test if V1 is a SCALAR_TO_VECTOR. if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); } + // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR + // (and probably will turn into a SCALAR_TO_VECTOR once legalization + // reaches it). + if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR && + !isa<ConstantSDNode>(V1.getOperand(0))) { + bool IsScalarToVector = true; + for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) + if (V1.getOperand(i).getOpcode() != ISD::UNDEF) { + IsScalarToVector = false; + break; + } + if (IsScalarToVector) + return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); + } return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, DAG.getConstant(Lane, MVT::i32)); } @@ -4453,6 +4526,15 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { return SDValue(); } +static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { + // INSERT_VECTOR_ELT is legal only for immediate indexes. + SDValue Lane = Op.getOperand(2); + if (!isa<ConstantSDNode>(Lane)) + return SDValue(); + + return Op; +} + static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { // EXTRACT_VECTOR_ELT is legal only for immediate indexes. SDValue Lane = Op.getOperand(1); @@ -4571,7 +4653,8 @@ static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) { if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment()); + LD->isNonTemporal(), LD->isInvariant(), + LD->getAlignment()); // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will // have been legalized as a BITCAST from v4i32. if (N->getOpcode() == ISD::BITCAST) { @@ -4961,7 +5044,6 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); - case ISD::EH_SJLJ_DISPATCHSETUP: return LowerEH_SJLJ_DISPATCHSETUP(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, Subtarget); case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG); @@ -4975,6 +5057,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SETCC: return LowerVSETCC(Op, DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); + case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); @@ -5502,52 +5585,6 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, return BB; } -/// EmitBasePointerRecalculation - For functions using a base pointer, we -/// rematerialize it (via the frame pointer). -void ARMTargetLowering:: -EmitBasePointerRecalculation(MachineInstr *MI, MachineBasicBlock *MBB, - MachineBasicBlock *DispatchBB) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII); - MachineFunction &MF = *MI->getParent()->getParent(); - ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); - - if (!RI.hasBasePointer(MF)) return; - - MachineBasicBlock::iterator MBBI = MI; - - int32_t NumBytes = AFI->getFramePtrSpillOffset(); - unsigned FramePtr = RI.getFrameRegister(MF); - assert(MF.getTarget().getFrameLowering()->hasFP(MF) && - "Base pointer without frame pointer?"); - - if (AFI->isThumb2Function()) - llvm::emitT2RegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6, - FramePtr, -NumBytes, ARMCC::AL, 0, *AII); - else if (AFI->isThumbFunction()) - llvm::emitThumbRegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6, - FramePtr, -NumBytes, *AII, RI); - else - llvm::emitARMRegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6, - FramePtr, -NumBytes, ARMCC::AL, 0, *AII); - - if (!RI.needsStackRealignment(MF)) return; - - // If there's dynamic realignment, adjust for it. - MachineFrameInfo *MFI = MF.getFrameInfo(); - unsigned MaxAlign = MFI->getMaxAlignment(); - assert(!AFI->isThumb1OnlyFunction()); - - // Emit bic r6, r6, MaxAlign - unsigned bicOpc = AFI->isThumbFunction() ? ARM::t2BICri : ARM::BICri; - AddDefaultCC( - AddDefaultPred( - BuildMI(*MBB, MBBI, MI->getDebugLoc(), TII->get(bicOpc), ARM::R6) - .addReg(ARM::R6, RegState::Kill) - .addImm(MaxAlign - 1))); -} - /// SetupEntryBlockForSjLj - Insert code into the entry block that creates and /// registers the function context. void ARMTargetLowering:: @@ -5582,8 +5619,6 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOStore, 4, 4); - EmitBasePointerRecalculation(MI, MBB, DispatchBB); - // Load the address of the dispatch MBB into the jump buffer. if (isThumb2) { // Incoming value: jbuf @@ -5757,6 +5792,8 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile, 4, 4); + BuildMI(DispatchBB, dl, TII->get(ARM::eh_sjlj_dispatchsetup)); + unsigned NumLPads = LPadList.size(); if (Subtarget->isThumb2()) { unsigned NewVReg1 = MRI->createVirtualRegister(TRC); @@ -5924,7 +5961,8 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { unsigned VReg1 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp)) .addReg(VReg1, RegState::Define) - .addConstantPoolIndex(Idx)); + .addConstantPoolIndex(Idx) + .addImm(0)); AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) .addReg(NewVReg1) .addReg(VReg1, RegState::Kill)); @@ -5984,9 +6022,10 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { // Remove the landing pad successor from the invoke block and replace it // with the new dispatch block. - for (MachineBasicBlock::succ_iterator - SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock *SMBB = *SI; + SmallVector<MachineBasicBlock*, 4> Successors(BB->succ_begin(), + BB->succ_end()); + while (!Successors.empty()) { + MachineBasicBlock *SMBB = Successors.pop_back_val(); if (SMBB->isLandingPad()) { BB->removeSuccessor(SMBB); MBBLPads.push_back(SMBB); @@ -6001,7 +6040,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { // executed. for (MachineBasicBlock::reverse_iterator II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) { - if (!II->getDesc().isCall()) continue; + if (!II->isCall()) continue; DenseMap<unsigned, bool> DefRegs; for (MachineInstr::mop_iterator @@ -6014,9 +6053,19 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { MachineInstrBuilder MIB(&*II); for (unsigned i = 0; SavedRegs[i] != 0; ++i) { - if (!TRC->contains(SavedRegs[i])) continue; - if (!DefRegs[SavedRegs[i]]) - MIB.addReg(SavedRegs[i], RegState::ImplicitDefine | RegState::Dead); + unsigned Reg = SavedRegs[i]; + if (Subtarget->isThumb2() && + !ARM::tGPRRegisterClass->contains(Reg) && + !ARM::hGPRRegisterClass->contains(Reg)) + continue; + else if (Subtarget->isThumb1Only() && + !ARM::tGPRRegisterClass->contains(Reg)) + continue; + else if (!Subtarget->isThumb() && + !ARM::GPRRegisterClass->contains(Reg)) + continue; + if (!DefRegs[Reg]) + MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead); } break; @@ -6402,13 +6451,13 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, SDNode *Node) const { - const MCInstrDesc *MCID = &MI->getDesc(); - if (!MCID->hasPostISelHook()) { + if (!MI->hasPostISelHook()) { assert(!convertAddSubFlagsOpcode(MI->getOpcode()) && "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'"); return; } + const MCInstrDesc *MCID = &MI->getDesc(); // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB, // RSC. Coming out of isel, they have an implicit CPSR def, but the optional // operand is still set to noreg. If needed, set the optional operand's @@ -6435,7 +6484,7 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, // Any ARM instruction that sets the 's' bit should specify an optional // "cc_out" operand in the last operand position. - if (!MCID->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) { + if (!MI->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) { assert(!NewOpc && "Optional cc_out operand required"); return; } @@ -7030,13 +7079,14 @@ static SDValue PerformVMOVRRDCombine(SDNode *N, SDValue BasePtr = LD->getBasePtr(); SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(), LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment()); + LD->isNonTemporal(), LD->isInvariant(), + LD->getAlignment()); SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, DAG.getConstant(4, MVT::i32)); SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, LD->getPointerInfo(), LD->isVolatile(), - LD->isNonTemporal(), + LD->isNonTemporal(), LD->isInvariant(), std::min(4U, LD->getAlignment() / 2)); DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1)); @@ -7928,7 +7978,7 @@ static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, // will return -0, so vmin can only be used for unsafe math or if one of // the operands is known to be nonzero. if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) && - !UnsafeFPMath && + !DAG.getTarget().Options.UnsafeFPMath && !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) break; Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN; @@ -7950,7 +8000,7 @@ static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, // will return +0, so vmax can only be used for unsafe math or if one of // the operands is known to be nonzero. if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) && - !UnsafeFPMath && + !DAG.getTarget().Options.UnsafeFPMath && !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) break; Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX; @@ -8105,6 +8155,41 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { } } +static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, + unsigned AlignCheck) { + return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) && + (DstAlign == 0 || DstAlign % AlignCheck == 0)); +} + +EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, + unsigned DstAlign, unsigned SrcAlign, + bool IsZeroVal, + bool MemcpyStrSrc, + MachineFunction &MF) const { + const Function *F = MF.getFunction(); + + // See if we can use NEON instructions for this... + if (IsZeroVal && + !F->hasFnAttr(Attribute::NoImplicitFloat) && + Subtarget->hasNEON()) { + if (memOpAlign(SrcAlign, DstAlign, 16) && Size >= 16) { + return MVT::v4i32; + } else if (memOpAlign(SrcAlign, DstAlign, 8) && Size >= 8) { + return MVT::v2i32; + } + } + + // Lowering to i32/i16 if the size permits. + if (Size >= 4) { + return MVT::i32; + } else if (Size >= 2) { + return MVT::i16; + } + + // Let the target-independent logic figure it out. + return MVT::Other; +} + static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { if (V < 0) return false; |
