diff options
Diffstat (limited to 'lib/Target/ARM/ARMISelLowering.cpp')
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.cpp | 218 |
1 files changed, 117 insertions, 101 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 5b3e31f..ef96e56 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -781,6 +781,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FSIN, MVT::f32, Expand); setOperationAction(ISD::FCOS, MVT::f32, Expand); setOperationAction(ISD::FCOS, MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::FREM, MVT::f64, Expand); setOperationAction(ISD::FREM, MVT::f32, Expand); if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && @@ -833,21 +835,21 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setSchedulingPreference(Sched::Hybrid); //// temporary - rewrite interface to use type - maxStoresPerMemset = 8; - maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4; - maxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores - maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2; - maxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores - maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2; + MaxStoresPerMemset = 8; + MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4; + MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores + MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2; + MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores + MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2; // On ARM arguments smaller than 4 bytes are extended, so all arguments // are at least 4 bytes aligned. setMinStackArgumentAlignment(4); - benefitFromCodePlacementOpt = true; + BenefitFromCodePlacementOpt = true; // Prefer likely predicted branches to selects on out-of-order cores. - predictableSelectIsExpensive = Subtarget->isLikeA9(); + PredictableSelectIsExpensive = Subtarget->isLikeA9(); setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); } @@ -1926,15 +1928,9 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true, isVarArg)); - // If this is the first return lowered for this function, add - // the regs to the liveout set for the function. - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { - for (unsigned i = 0; i != RVLocs.size(); ++i) - if (RVLocs[i].isRegLoc()) - DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } - SDValue Flag; + SmallVector<SDValue, 4> RetOps; + RetOps.push_back(Chain); // Operand #0 = Chain (updated below) // Copy the result values into the output registers. for (unsigned i = 0, realRVLocIdx = 0; @@ -1963,10 +1959,12 @@ ARMTargetLowering::LowerReturn(SDValue Chain, Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs.getValue(1), Flag); Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc // Extract the 2nd half and fall through to handle it as an f64 value. @@ -1979,6 +1977,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1), Flag); @@ -1988,15 +1987,16 @@ ARMTargetLowering::LowerReturn(SDValue Chain, // Guarantee that all emitted copies are // stuck together, avoiding something bad. Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } - SDValue result; + // Update chain and glue. + RetOps[0] = Chain; if (Flag.getNode()) - result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag); - else // Return Void - result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain); + RetOps.push_back(Flag); - return result; + return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, + RetOps.data(), RetOps.size()); } bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { @@ -2576,7 +2576,7 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, } // The remaining GPRs hold either the beginning of variable-argument -// data, or the beginning of an aggregate passed by value (usuall +// data, or the beginning of an aggregate passed by value (usually // byval). Either way, we allocate stack slots adjacent to the data // provided by our caller, and store the unallocated registers there. // If this is a variadic function, the va_list pointer will begin with @@ -4294,6 +4294,21 @@ static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ return true; } +/// \return true if this is a reverse operation on an vector. +static bool isReverseMask(ArrayRef<int> M, EVT VT) { + unsigned NumElts = VT.getVectorNumElements(); + // Make sure the mask has the right size. + if (NumElts != M.size()) + return false; + + // Look for <15, ..., 3, -1, 1, 0>. + for (unsigned i = 0; i != NumElts; ++i) + if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i)) + return false; + + return true; +} + // If N is an integer constant that can be moved into a register in one // instruction, return an SDValue of such a constant (will become a MOV // instruction). Otherwise return null. @@ -4689,7 +4704,8 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, isVZIPMask(M, VT, WhichResult) || isVTRN_v_undef_Mask(M, VT, WhichResult) || isVUZP_v_undef_Mask(M, VT, WhichResult) || - isVZIP_v_undef_Mask(M, VT, WhichResult)); + isVZIP_v_undef_Mask(M, VT, WhichResult) || + ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT))); } /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit @@ -4793,6 +4809,23 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, &VTBLMask[0], 8)); } +static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, + SelectionDAG &DAG) { + DebugLoc DL = Op.getDebugLoc(); + SDValue OpLHS = Op.getOperand(0); + EVT VT = OpLHS.getValueType(); + + assert((VT == MVT::v8i16 || VT == MVT::v16i8) && + "Expect an v8i16/v16i8 type"); + OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS); + // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now, + // extract the first 8 bytes into the top double word and the last 8 bytes + // into the bottom double word. The v8i16 case is similar. + unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4; + return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS, + DAG.getConstant(ExtractNum, MVT::i32)); +} + static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); @@ -4930,6 +4963,9 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::BITCAST, dl, VT, Val); } + if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT)) + return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG); + if (VT == MVT::v8i8) { SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG); if (NewOp.getNode()) @@ -5967,9 +6003,6 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); } - unsigned ldrOpc = isThumb2 ? ARM::t2LDREXD : ARM::LDREXD; - unsigned strOpc = isThumb2 ? ARM::t2STREXD : ARM::STREXD; - MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *contBB = 0, *cont2BB = 0; if (IsCmpxchg || IsMinMax) @@ -6007,42 +6040,26 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, // cmp storesuccess, #0 // bne- loopMBB // fallthrough --> exitMBB - // - // Note that the registers are explicitly specified because there is not any - // way to force the register allocator to allocate a register pair. - // - // FIXME: The hardcoded registers are not necessary for Thumb2, but we - // need to properly enforce the restriction that the two output registers - // for ldrexd must be different. BB = loopMBB; + // Load - unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - unsigned GPRPair1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - unsigned GPRPair2; - if (IsMinMax) { - //We need an extra double register for doing min/max. - unsigned undef = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - GPRPair2 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), undef); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1) - .addReg(undef) - .addReg(vallo) - .addImm(ARM::gsub_0); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair2) - .addReg(r1) - .addReg(valhi) - .addImm(ARM::gsub_1); + if (isThumb2) { + AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2LDREXD)) + .addReg(destlo, RegState::Define) + .addReg(desthi, RegState::Define) + .addReg(ptr)); + } else { + unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDREXD)) + .addReg(GPRPair0, RegState::Define).addReg(ptr)); + // Copy r2/r3 into dest. (This copy will normally be coalesced.) + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo) + .addReg(GPRPair0, 0, ARM::gsub_0); + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi) + .addReg(GPRPair0, 0, ARM::gsub_1); } - AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc)) - .addReg(GPRPair0, RegState::Define).addReg(ptr)); - // Copy r2/r3 into dest. (This copy will normally be coalesced.) - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo) - .addReg(GPRPair0, 0, ARM::gsub_0); - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi) - .addReg(GPRPair0, 0, ARM::gsub_1); - + unsigned StoreLo, StoreHi; if (IsCmpxchg) { // Add early exit for (unsigned i = 0; i < 2; i++) { @@ -6058,19 +6075,8 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, } // Copy to physregs for strexd - unsigned setlo = MI->getOperand(5).getReg(); - unsigned sethi = MI->getOperand(6).getReg(); - unsigned undef = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), undef); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1) - .addReg(undef) - .addReg(setlo) - .addImm(ARM::gsub_0); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair1) - .addReg(r1) - .addReg(sethi) - .addImm(ARM::gsub_1); + StoreLo = MI->getOperand(5).getReg(); + StoreHi = MI->getOperand(6).getReg(); } else if (Op1) { // Perform binary operation unsigned tmpRegLo = MRI.createVirtualRegister(TRC); @@ -6082,32 +6088,13 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, .addReg(desthi).addReg(valhi)) .addReg(IsMinMax ? ARM::CPSR : 0, getDefRegState(IsMinMax)); - unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair); - unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1) - .addReg(UndefPair) - .addReg(tmpRegLo) - .addImm(ARM::gsub_0); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair1) - .addReg(r1) - .addReg(tmpRegHi) - .addImm(ARM::gsub_1); + StoreLo = tmpRegLo; + StoreHi = tmpRegHi; } else { // Copy to physregs for strexd - unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1) - .addReg(UndefPair) - .addReg(vallo) - .addImm(ARM::gsub_0); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair1) - .addReg(r1) - .addReg(valhi) - .addImm(ARM::gsub_1); + StoreLo = vallo; + StoreHi = valhi; } - unsigned GPRPairStore = GPRPair1; if (IsMinMax) { // Compare and branch to exit block. BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) @@ -6115,12 +6102,33 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, BB->addSuccessor(exitMBB); BB->addSuccessor(contBB); BB = contBB; - GPRPairStore = GPRPair2; + StoreLo = vallo; + StoreHi = valhi; } // Store - AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess) - .addReg(GPRPairStore).addReg(ptr)); + if (isThumb2) { + AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2STREXD), storesuccess) + .addReg(StoreLo).addReg(StoreHi).addReg(ptr)); + } else { + // Marshal a pair... + unsigned StorePair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair); + BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1) + .addReg(UndefPair) + .addReg(StoreLo) + .addImm(ARM::gsub_0); + BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), StorePair) + .addReg(r1) + .addReg(StoreHi) + .addImm(ARM::gsub_1); + + // ...and store it + AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::STREXD), storesuccess) + .addReg(StorePair).addReg(ptr)); + } // Cmp+jump AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) .addReg(storesuccess).addImm(0)); @@ -6329,7 +6337,16 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { DispatchBB->setIsLandingPad(); MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); - BuildMI(TrapBB, dl, TII->get(Subtarget->isThumb() ? ARM::tTRAP : ARM::TRAP)); + unsigned trap_opcode; + if (Subtarget->isThumb()) { + trap_opcode = ARM::tTRAP; + } else { + if (Subtarget->useNaClTrap()) + trap_opcode = ARM::TRAPNaCl; + else + trap_opcode = ARM::TRAP; + } + BuildMI(TrapBB, dl, TII->get(trap_opcode)); DispatchBB->addSuccessor(TrapBB); MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock(); @@ -7123,7 +7140,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, /*NeedsCarry*/ true, /*IsCmpxchg*/false, - /*IsMinMax*/ true, ARMCC::LE); + /*IsMinMax*/ true, ARMCC::LT); case ARM::ATOMMAX6432: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, @@ -7133,7 +7150,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, /*NeedsCarry*/ true, /*IsCmpxchg*/false, - /*IsMinMax*/ true, ARMCC::LS); + /*IsMinMax*/ true, ARMCC::LO); case ARM::ATOMUMAX6432: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, @@ -10343,4 +10360,3 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, return false; } - |