aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/ARM/ARMISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/ARM/ARMISelLowering.cpp')
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp218
1 files changed, 117 insertions, 101 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 5b3e31f..ef96e56 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -781,6 +781,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FSIN, MVT::f32, Expand);
setOperationAction(ISD::FCOS, MVT::f32, Expand);
setOperationAction(ISD::FCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FREM, MVT::f64, Expand);
setOperationAction(ISD::FREM, MVT::f32, Expand);
if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
@@ -833,21 +835,21 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setSchedulingPreference(Sched::Hybrid);
//// temporary - rewrite interface to use type
- maxStoresPerMemset = 8;
- maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
- maxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
- maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
- maxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
- maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
+ MaxStoresPerMemset = 8;
+ MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+ MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
+ MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
+ MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
+ MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
// On ARM arguments smaller than 4 bytes are extended, so all arguments
// are at least 4 bytes aligned.
setMinStackArgumentAlignment(4);
- benefitFromCodePlacementOpt = true;
+ BenefitFromCodePlacementOpt = true;
// Prefer likely predicted branches to selects on out-of-order cores.
- predictableSelectIsExpensive = Subtarget->isLikeA9();
+ PredictableSelectIsExpensive = Subtarget->isLikeA9();
setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
}
@@ -1926,15 +1928,9 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
isVarArg));
- // If this is the first return lowered for this function, add
- // the regs to the liveout set for the function.
- if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- if (RVLocs[i].isRegLoc())
- DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
- }
-
SDValue Flag;
+ SmallVector<SDValue, 4> RetOps;
+ RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
// Copy the result values into the output registers.
for (unsigned i = 0, realRVLocIdx = 0;
@@ -1963,10 +1959,12 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
HalfGPRs.getValue(1), Flag);
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
// Extract the 2nd half and fall through to handle it as an f64 value.
@@ -1979,6 +1977,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
Flag);
@@ -1988,15 +1987,16 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
// Guarantee that all emitted copies are
// stuck together, avoiding something bad.
Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
- SDValue result;
+ // Update chain and glue.
+ RetOps[0] = Chain;
if (Flag.getNode())
- result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
- else // Return Void
- result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain);
+ RetOps.push_back(Flag);
- return result;
+ return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other,
+ RetOps.data(), RetOps.size());
}
bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
@@ -2576,7 +2576,7 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
}
// The remaining GPRs hold either the beginning of variable-argument
-// data, or the beginning of an aggregate passed by value (usuall
+// data, or the beginning of an aggregate passed by value (usually
// byval). Either way, we allocate stack slots adjacent to the data
// provided by our caller, and store the unallocated registers there.
// If this is a variadic function, the va_list pointer will begin with
@@ -4294,6 +4294,21 @@ static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
return true;
}
+/// \return true if this is a reverse operation on an vector.
+static bool isReverseMask(ArrayRef<int> M, EVT VT) {
+ unsigned NumElts = VT.getVectorNumElements();
+ // Make sure the mask has the right size.
+ if (NumElts != M.size())
+ return false;
+
+ // Look for <15, ..., 3, -1, 1, 0>.
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
+ return false;
+
+ return true;
+}
+
// If N is an integer constant that can be moved into a register in one
// instruction, return an SDValue of such a constant (will become a MOV
// instruction). Otherwise return null.
@@ -4689,7 +4704,8 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
isVZIPMask(M, VT, WhichResult) ||
isVTRN_v_undef_Mask(M, VT, WhichResult) ||
isVUZP_v_undef_Mask(M, VT, WhichResult) ||
- isVZIP_v_undef_Mask(M, VT, WhichResult));
+ isVZIP_v_undef_Mask(M, VT, WhichResult) ||
+ ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT)));
}
/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
@@ -4793,6 +4809,23 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
&VTBLMask[0], 8));
}
+static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
+ SelectionDAG &DAG) {
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue OpLHS = Op.getOperand(0);
+ EVT VT = OpLHS.getValueType();
+
+ assert((VT == MVT::v8i16 || VT == MVT::v16i8) &&
+ "Expect an v8i16/v16i8 type");
+ OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS);
+ // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now,
+ // extract the first 8 bytes into the top double word and the last 8 bytes
+ // into the bottom double word. The v8i16 case is similar.
+ unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4;
+ return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS,
+ DAG.getConstant(ExtractNum, MVT::i32));
+}
+
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
@@ -4930,6 +4963,9 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
}
+ if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
+ return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG);
+
if (VT == MVT::v8i8) {
SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG);
if (NewOp.getNode())
@@ -5967,9 +6003,6 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
}
- unsigned ldrOpc = isThumb2 ? ARM::t2LDREXD : ARM::LDREXD;
- unsigned strOpc = isThumb2 ? ARM::t2STREXD : ARM::STREXD;
-
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *contBB = 0, *cont2BB = 0;
if (IsCmpxchg || IsMinMax)
@@ -6007,42 +6040,26 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
// cmp storesuccess, #0
// bne- loopMBB
// fallthrough --> exitMBB
- //
- // Note that the registers are explicitly specified because there is not any
- // way to force the register allocator to allocate a register pair.
- //
- // FIXME: The hardcoded registers are not necessary for Thumb2, but we
- // need to properly enforce the restriction that the two output registers
- // for ldrexd must be different.
BB = loopMBB;
+
// Load
- unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- unsigned GPRPair1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- unsigned GPRPair2;
- if (IsMinMax) {
- //We need an extra double register for doing min/max.
- unsigned undef = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- GPRPair2 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), undef);
- BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1)
- .addReg(undef)
- .addReg(vallo)
- .addImm(ARM::gsub_0);
- BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair2)
- .addReg(r1)
- .addReg(valhi)
- .addImm(ARM::gsub_1);
+ if (isThumb2) {
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2LDREXD))
+ .addReg(destlo, RegState::Define)
+ .addReg(desthi, RegState::Define)
+ .addReg(ptr));
+ } else {
+ unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDREXD))
+ .addReg(GPRPair0, RegState::Define).addReg(ptr));
+ // Copy r2/r3 into dest. (This copy will normally be coalesced.)
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo)
+ .addReg(GPRPair0, 0, ARM::gsub_0);
+ BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi)
+ .addReg(GPRPair0, 0, ARM::gsub_1);
}
- AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc))
- .addReg(GPRPair0, RegState::Define).addReg(ptr));
- // Copy r2/r3 into dest. (This copy will normally be coalesced.)
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo)
- .addReg(GPRPair0, 0, ARM::gsub_0);
- BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi)
- .addReg(GPRPair0, 0, ARM::gsub_1);
-
+ unsigned StoreLo, StoreHi;
if (IsCmpxchg) {
// Add early exit
for (unsigned i = 0; i < 2; i++) {
@@ -6058,19 +6075,8 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
}
// Copy to physregs for strexd
- unsigned setlo = MI->getOperand(5).getReg();
- unsigned sethi = MI->getOperand(6).getReg();
- unsigned undef = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), undef);
- BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1)
- .addReg(undef)
- .addReg(setlo)
- .addImm(ARM::gsub_0);
- BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair1)
- .addReg(r1)
- .addReg(sethi)
- .addImm(ARM::gsub_1);
+ StoreLo = MI->getOperand(5).getReg();
+ StoreHi = MI->getOperand(6).getReg();
} else if (Op1) {
// Perform binary operation
unsigned tmpRegLo = MRI.createVirtualRegister(TRC);
@@ -6082,32 +6088,13 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
.addReg(desthi).addReg(valhi))
.addReg(IsMinMax ? ARM::CPSR : 0, getDefRegState(IsMinMax));
- unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair);
- unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1)
- .addReg(UndefPair)
- .addReg(tmpRegLo)
- .addImm(ARM::gsub_0);
- BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair1)
- .addReg(r1)
- .addReg(tmpRegHi)
- .addImm(ARM::gsub_1);
+ StoreLo = tmpRegLo;
+ StoreHi = tmpRegHi;
} else {
// Copy to physregs for strexd
- unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
- BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair);
- BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1)
- .addReg(UndefPair)
- .addReg(vallo)
- .addImm(ARM::gsub_0);
- BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair1)
- .addReg(r1)
- .addReg(valhi)
- .addImm(ARM::gsub_1);
+ StoreLo = vallo;
+ StoreHi = valhi;
}
- unsigned GPRPairStore = GPRPair1;
if (IsMinMax) {
// Compare and branch to exit block.
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
@@ -6115,12 +6102,33 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
BB->addSuccessor(exitMBB);
BB->addSuccessor(contBB);
BB = contBB;
- GPRPairStore = GPRPair2;
+ StoreLo = vallo;
+ StoreHi = valhi;
}
// Store
- AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess)
- .addReg(GPRPairStore).addReg(ptr));
+ if (isThumb2) {
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2STREXD), storesuccess)
+ .addReg(StoreLo).addReg(StoreHi).addReg(ptr));
+ } else {
+ // Marshal a pair...
+ unsigned StorePair = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+ BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair);
+ BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1)
+ .addReg(UndefPair)
+ .addReg(StoreLo)
+ .addImm(ARM::gsub_0);
+ BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), StorePair)
+ .addReg(r1)
+ .addReg(StoreHi)
+ .addImm(ARM::gsub_1);
+
+ // ...and store it
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::STREXD), storesuccess)
+ .addReg(StorePair).addReg(ptr));
+ }
// Cmp+jump
AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
.addReg(storesuccess).addImm(0));
@@ -6329,7 +6337,16 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
DispatchBB->setIsLandingPad();
MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
- BuildMI(TrapBB, dl, TII->get(Subtarget->isThumb() ? ARM::tTRAP : ARM::TRAP));
+ unsigned trap_opcode;
+ if (Subtarget->isThumb()) {
+ trap_opcode = ARM::tTRAP;
+ } else {
+ if (Subtarget->useNaClTrap())
+ trap_opcode = ARM::TRAPNaCl;
+ else
+ trap_opcode = ARM::TRAP;
+ }
+ BuildMI(TrapBB, dl, TII->get(trap_opcode));
DispatchBB->addSuccessor(TrapBB);
MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
@@ -7123,7 +7140,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ true, /*IsCmpxchg*/false,
- /*IsMinMax*/ true, ARMCC::LE);
+ /*IsMinMax*/ true, ARMCC::LT);
case ARM::ATOMMAX6432:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
@@ -7133,7 +7150,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
/*NeedsCarry*/ true, /*IsCmpxchg*/false,
- /*IsMinMax*/ true, ARMCC::LS);
+ /*IsMinMax*/ true, ARMCC::LO);
case ARM::ATOMUMAX6432:
return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
@@ -10343,4 +10360,3 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
return false;
}
-