aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target/ARM
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r--lib/Target/ARM/ARM.td22
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp79
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h8
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp8
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h1
-rw-r--r--lib/Target/ARM/ARMCallingConv.h6
-rw-r--r--lib/Target/ARM/ARMCallingConv.td14
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp12
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp18
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp88
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp253
-rw-r--r--lib/Target/ARM/ARMISelLowering.h18
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td186
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td41
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td22
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp20
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h10
-rw-r--r--lib/Target/ARM/ARMSchedule.td71
-rw-r--r--lib/Target/ARM/ARMScheduleA9.td53
-rw-r--r--lib/Target/ARM/ARMScheduleSwift.td61
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp12
-rw-r--r--lib/Target/ARM/ARMSubtarget.h13
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp5
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp90
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp54
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp34
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp18
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.h7
-rw-r--r--[-rwxr-xr-x]lib/Target/ARM/LICENSE.TXT0
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp159
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h13
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp198
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h114
-rw-r--r--lib/Target/ARM/MCTargetDesc/CMakeLists.txt1
-rw-r--r--lib/Target/ARM/README-Thumb.txt2
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp14
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp2
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp84
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp147
39 files changed, 1491 insertions, 467 deletions
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 46915ee..2d747091 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -59,6 +59,8 @@ def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
"FP compare + branch is slow">;
def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
"Floating point unit supports single precision only">;
+def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true",
+ "Enable support for TrustZone security extensions">;
// Some processors have FP multiply-accumulate instructions that don't
// play nicely with other VFP / NEON instructions, and it's generally better
@@ -143,32 +145,34 @@ include "ARMSchedule.td"
// ARM processor families.
def ProcA5 : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5",
"Cortex-A5 ARM processors",
- [FeatureSlowFPBrcc, FeatureNEONForFP,
- FeatureHasSlowFPVMLx, FeatureVMLxForwarding,
- FeatureT2XtPk]>;
+ [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
+ FeatureVMLxForwarding, FeatureT2XtPk,
+ FeatureTrustZone]>;
def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
"Cortex-A8 ARM processors",
- [FeatureSlowFPBrcc, FeatureNEONForFP,
- FeatureHasSlowFPVMLx, FeatureVMLxForwarding,
- FeatureT2XtPk]>;
+ [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
+ FeatureVMLxForwarding, FeatureT2XtPk,
+ FeatureTrustZone]>;
def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
"Cortex-A9 ARM processors",
[FeatureVMLxForwarding,
FeatureT2XtPk, FeatureFP16,
- FeatureAvoidPartialCPSR]>;
+ FeatureAvoidPartialCPSR,
+ FeatureTrustZone]>;
def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
"Swift ARM processors",
[FeatureNEONForFP, FeatureT2XtPk,
FeatureVFP4, FeatureMP, FeatureHWDiv,
FeatureHWDivARM, FeatureAvoidPartialCPSR,
FeatureAvoidMOVsShOp,
- FeatureHasSlowFPVMLx]>;
+ FeatureHasSlowFPVMLx, FeatureTrustZone]>;
// FIXME: It has not been determined if A15 has these features.
def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15",
"Cortex-A15 ARM processors",
[FeatureT2XtPk, FeatureFP16,
- FeatureAvoidPartialCPSR]>;
+ FeatureAvoidPartialCPSR,
+ FeatureTrustZone]>;
def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5",
"Cortex-R5 ARM processors",
[FeatureSlowFPBrcc, FeatureHWDivARM,
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index ed8b9cd..0d1417d 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -747,10 +747,10 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Mov->addRegisterKilled(SrcReg, TRI);
}
-static const
-MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB,
- unsigned Reg, unsigned SubIdx, unsigned State,
- const TargetRegisterInfo *TRI) {
+const MachineInstrBuilder &
+ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
+ unsigned SubIdx, unsigned State,
+ const TargetRegisterInfo *TRI) const {
if (!SubIdx)
return MIB.addReg(Reg, State);
@@ -795,12 +795,22 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
} else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA))
- .addFrameIndex(FI))
- .addMemOperand(MMO);
- MIB = AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
- AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
+ if (Subtarget.hasV5TEOps()) {
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD));
+ AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
+ AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
+ MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
+
+ AddDefaultPred(MIB);
+ } else {
+ // Fallback to STM instruction, which has existed since the dawn of
+ // time.
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA))
+ .addFrameIndex(FI).addMemOperand(MMO));
+ AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
+ AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
+ }
} else
llvm_unreachable("Unknown reg class!");
break;
@@ -948,7 +958,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
MachineFrameInfo &MFI = *MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
MachineMemOperand *MMO =
@@ -975,12 +984,24 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
} else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
- unsigned LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA : ARM::LDMIA;
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(LdmOpc))
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
- MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
+ MachineInstrBuilder MIB;
+
+ if (Subtarget.hasV5TEOps()) {
+ MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
+ AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
+ AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
+ MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
+
+ AddDefaultPred(MIB);
+ } else {
+ // Fallback to LDM instruction, which has existed since the dawn of
+ // time.
+ MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDMIA))
+ .addFrameIndex(FI).addMemOperand(MMO));
+ MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
+ }
+
if (TargetRegisterInfo::isPhysicalRegister(DestReg))
MIB.addReg(DestReg, RegState::ImplicitDefine);
} else
@@ -3734,9 +3755,9 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
- // A9-like cores are particularly picky about mixing the two and want these
+ // CortexA9 is particularly picky about mixing the two and wants these
// converted.
- if (Subtarget.isLikeA9() && !isPredicated(MI) &&
+ if (Subtarget.isCortexA9() && !isPredicated(MI) &&
(MI->getOpcode() == ARM::VMOVRS ||
MI->getOpcode() == ARM::VMOVSR ||
MI->getOpcode() == ARM::VMOVS))
@@ -4023,14 +4044,12 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
// VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
//
// FCONSTD can be used as a dependency-breaking instruction.
-
-
unsigned ARMBaseInstrInfo::
getPartialRegUpdateClearance(const MachineInstr *MI,
unsigned OpNum,
const TargetRegisterInfo *TRI) const {
- // Only Swift has partial register update problems.
- if (!SwiftPartialUpdateClearance || !Subtarget.isSwift())
+ if (!SwiftPartialUpdateClearance ||
+ !(Subtarget.isSwift() || Subtarget.isCortexA15()))
return 0;
assert(TRI && "Need TRI instance");
@@ -4056,7 +4075,7 @@ getPartialRegUpdateClearance(const MachineInstr *MI,
// Explicitly reads the dependency.
case ARM::VLD1LNd32:
- UseOp = 1;
+ UseOp = 3;
break;
default:
return 0;
@@ -4125,3 +4144,15 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI,
bool ARMBaseInstrInfo::hasNOP() const {
return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0;
}
+
+bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const {
+ unsigned ShOpVal = MI->getOperand(3).getImm();
+ unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
+ // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
+ if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) ||
+ ((ShImm == 1 || ShImm == 2) &&
+ ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl))
+ return true;
+
+ return false;
+}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 2698132..2ef659c 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -141,6 +141,10 @@ public:
MachineInstr *commuteInstruction(MachineInstr*, bool=false) const;
+ const MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
+ unsigned SubIdx, unsigned State,
+ const TargetRegisterInfo *TRI) const;
+
virtual bool produceSameValue(const MachineInstr *MI0,
const MachineInstr *MI1,
const MachineRegisterInfo *MRI) const;
@@ -314,6 +318,10 @@ public:
bool canCauseFpMLxStall(unsigned Opcode) const {
return MLxHazardOpcodes.count(Opcode);
}
+
+ /// Returns true if the instruction has a shift by immediate that can be
+ /// executed in one cycle less.
+ bool isSwiftFastImmShift(const MachineInstr *MI) const;
};
static inline
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index abdd251..b0d34a7 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -75,6 +75,12 @@ ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID) const {
}
const uint32_t*
+ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const {
+ return (STI.isTargetIOS() && !STI.isAAPCS_ABI())
+ ? CSR_iOS_ThisReturn_RegMask : CSR_AAPCS_ThisReturn_RegMask;
+}
+
+const uint32_t*
ARMBaseRegisterInfo::getNoPreservedMask() const {
return CSR_NoRegs_RegMask;
}
@@ -680,7 +686,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// means the stack pointer cannot be used to access the emergency spill slot
// when !hasReservedCallFrame().
#ifndef NDEBUG
- if (RS && FrameReg == ARM::SP && FrameIndex == RS->getScavengingFrameIndex()){
+ if (RS && FrameReg == ARM::SP && RS->isScavengingFrameIndex(FrameIndex)){
assert(TFI->hasReservedCallFrame(MF) &&
"Cannot use SP to access the emergency spill slot in "
"functions without a reserved call frame");
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index 725033b..0679919 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -96,6 +96,7 @@ public:
/// Code Generation virtual methods...
const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
const uint32_t *getCallPreservedMask(CallingConv::ID) const;
+ const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const;
const uint32_t *getNoPreservedMask() const;
BitVector getReservedRegs(const MachineFunction &MF) const;
diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h
index e6e8c3d..4f94ad2 100644
--- a/lib/Target/ARM/ARMCallingConv.h
+++ b/lib/Target/ARM/ARMCallingConv.h
@@ -74,9 +74,15 @@ static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 };
static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 };
static const uint16_t ShadowRegList[] = { ARM::R0, ARM::R1 };
+ static const uint16_t GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2);
if (Reg == 0) {
+
+ // If we had R3 unallocated only, now we still must to waste it.
+ Reg = State.AllocateReg(GPRArgRegs, 4);
+ assert((!Reg || Reg == ARM::R3) && "Wrong GPRs usage for f64");
+
// For the 2nd half of a v2f64, do not just fail.
if (CanFail)
return false;
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index b378b96..8ff666e 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -111,8 +111,7 @@ def CC_ARM_AAPCS_Common : CallingConv<[
// i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register
// (and the same is true for f64 if VFP is not enabled)
CCIfType<[i32], CCIfAlign<"8", CCAssignToRegWithShadow<[R0, R2], [R0, R1]>>>,
- CCIfType<[i32], CCIf<"State.getNextStackOffset() == 0 &&"
- "ArgFlags.getOrigAlign() != 8",
+ CCIfType<[i32], CCIf<"ArgFlags.getOrigAlign() != 8",
CCAssignToReg<[R0, R1, R2, R3]>>>,
CCIfType<[i32], CCIfAlign<"8", CCAssignToStackWithShadow<4, 8, R3>>>,
@@ -195,10 +194,21 @@ def CSR_NoRegs : CalleeSavedRegs<(add)>;
def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4,
(sequence "D%u", 15, 8))>;
+// Constructors and destructors return 'this' in the ARM C++ ABI; since 'this'
+// and the pointer return value are both passed in R0 in these cases, this can
+// be partially modelled by treating R0 as a callee-saved register
+// Only the resulting RegMask is used; the SaveList is ignored
+def CSR_AAPCS_ThisReturn : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6,
+ R5, R4, (sequence "D%u", 15, 8),
+ R0)>;
+
// iOS ABI deviates from ARM standard ABI. R9 is not a callee-saved register.
// Also save R7-R4 first to match the stack frame fixed spill areas.
def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>;
+def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
+ (sub CSR_AAPCS_ThisReturn, R9))>;
+
// GHC set of callee saved regs is empty as all those regs are
// used for passing STG regs around
// add is a workaround for not being able to compile empty list:
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 29fcd40..5d45f64 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -144,8 +144,8 @@ class ARMFastISel : public FastISel {
virtual bool TargetSelectInstruction(const Instruction *I);
virtual unsigned TargetMaterializeConstant(const Constant *C);
virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
- virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
- const LoadInst *LI);
+ virtual bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI);
virtual bool FastLowerArguments();
private:
#include "ARMGenFastISel.inc"
@@ -2605,7 +2605,7 @@ unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
unsigned Opc;
bool isBoolZext = false;
- const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32);
+ const TargetRegisterClass *RC;
switch (SrcVT.SimpleTy) {
default: return 0;
case MVT::i16:
@@ -2797,12 +2797,12 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
return false;
}
-/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+/// \brief The specified machine instr operand is a vreg, and that
/// vreg is being provided by the specified load instruction. If possible,
/// try to fold the load as an operand to the instruction, returning true if
/// successful.
-bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
- const LoadInst *LI) {
+bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI) {
// Verify we have a legal type before going any further.
MVT VT;
if (!isLoadTypeLegal(LI->getType(), VT))
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 3b12408..483802b 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -141,7 +141,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
assert(!AFI->isThumb1OnlyFunction() &&
"This emitPrologue does not support Thumb1!");
bool isARM = !AFI->isThumbFunction();
- unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
unsigned NumBytes = MFI->getStackSize();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -159,8 +159,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
return;
// Allocate the vararg register save area. This is not counted in NumBytes.
- if (VARegSaveSize)
- emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize,
+ if (ArgRegsSaveSize)
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
MachineInstr::FrameSetup);
if (!AFI->hasStackFrame()) {
@@ -357,7 +357,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
"This emitEpilogue does not support Thumb1!");
bool isARM = !AFI->isThumbFunction();
- unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
int NumBytes = (int)MFI->getStackSize();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
@@ -471,8 +471,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
MBBI = NewMI;
}
- if (VARegSaveSize)
- emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize);
+ if (ArgRegsSaveSize)
+ emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize);
}
/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
@@ -1003,7 +1003,7 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+ bool isVarArg = AFI->getArgRegsSaveSize() > 0;
unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
// The emitPopInst calls below do not insert reloads for the aligned DPRCS2
@@ -1174,7 +1174,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
if (AFI->isThumb1OnlyFunction()) {
// Spill LR if Thumb1 function uses variable length argument lists.
- if (AFI->getVarArgsRegSaveSize() > 0)
+ if (AFI->getArgRegsSaveSize() > 0)
MRI.setPhysRegUsed(ARM::LR);
// Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know
@@ -1368,7 +1368,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// note: Thumb1 functions spill to R12, not the stack. Reserve a slot
// closest to SP or frame pointer.
const TargetRegisterClass *RC = &ARM::GPRRegClass;
- RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
RC->getAlignment(),
false));
}
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 2c51de2..9e1782e 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -1469,14 +1469,14 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
SDValue Ops[]= { Base, AMOpc, getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32), Chain };
return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32,
- MVT::i32, MVT::Other, Ops, 5);
+ MVT::i32, MVT::Other, Ops);
} else {
SDValue Chain = LD->getChain();
SDValue Base = LD->getBasePtr();
SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32), Chain };
return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32,
- MVT::i32, MVT::Other, Ops, 6);
+ MVT::i32, MVT::Other, Ops);
}
}
@@ -1525,7 +1525,7 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
SDValue Ops[]= { Base, Offset, getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32), Chain };
return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32,
- MVT::Other, Ops, 5);
+ MVT::Other, Ops);
}
return NULL;
@@ -1539,7 +1539,7 @@ SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, MVT::i32);
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
}
/// \brief Form a D register from a pair of S registers.
@@ -1550,7 +1550,7 @@ SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
}
/// \brief Form a quad register from a pair of D registers.
@@ -1560,7 +1560,7 @@ SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
}
/// \brief Form 4 consecutive D registers from a pair of Q registers.
@@ -1570,7 +1570,7 @@ SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
}
/// \brief Form 4 consecutive S registers.
@@ -1585,7 +1585,7 @@ SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, MVT::i32);
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
V2, SubReg2, V3, SubReg3 };
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
}
/// \brief Form 4 consecutive D registers.
@@ -1599,7 +1599,7 @@ SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32);
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
V2, SubReg2, V3, SubReg3 };
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
}
/// \brief Form 4 consecutive Q registers.
@@ -1613,7 +1613,7 @@ SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, MVT::i32);
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
V2, SubReg2, V3, SubReg3 };
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
}
/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
@@ -1761,7 +1761,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
Ops.push_back(Pred);
Ops.push_back(Reg0);
Ops.push_back(Chain);
- VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+ VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
} else {
// Otherwise, quad registers are loaded with two separate instructions,
@@ -1774,7 +1774,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
- ResTy, AddrTy, MVT::Other, OpsA, 7);
+ ResTy, AddrTy, MVT::Other, OpsA);
Chain = SDValue(VLdA, 2);
// Load the odd subregs.
@@ -1791,8 +1791,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
Ops.push_back(Pred);
Ops.push_back(Reg0);
Ops.push_back(Chain);
- VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
- Ops.data(), Ops.size());
+ VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
}
// Transfer memoperands.
@@ -1913,8 +1912,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
Ops.push_back(Pred);
Ops.push_back(Reg0);
Ops.push_back(Chain);
- SDNode *VSt =
- CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+ SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
// Transfer memoperands.
cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
@@ -1939,7 +1937,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
MemAddr.getValueType(),
- MVT::Other, OpsA, 7);
+ MVT::Other, OpsA);
cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
Chain = SDValue(VStA, 1);
@@ -1958,7 +1956,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
Ops.push_back(Reg0);
Ops.push_back(Chain);
SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
- Ops.data(), Ops.size());
+ Ops);
cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
return VStB;
}
@@ -2063,8 +2061,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
QOpcodes[OpcodeIndex]);
- SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys,
- Ops.data(), Ops.size());
+ SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
if (!IsLoad)
return VLdLn;
@@ -2150,8 +2147,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
if (isUpdating)
ResTys.push_back(MVT::i32);
ResTys.push_back(MVT::Other);
- SDNode *VLdDup =
- CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+ SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
SuperReg = SDValue(VLdDup, 0);
@@ -2197,7 +2193,7 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
Ops.push_back(getAL(CurDAG)); // predicate
Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
- return CurDAG->getMachineNode(Opc, dl, VT, Ops.data(), Ops.size());
+ return CurDAG->getMachineNode(Opc, dl, VT, Ops);
}
SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
@@ -2542,7 +2538,7 @@ SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
MVT::i32, MVT::i32, MVT::Other,
- Ops.data() ,Ops.size());
+ Ops);
cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
return ResNode;
}
@@ -2599,7 +2595,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
- Ops, 4);
+ Ops);
} else {
SDValue Ops[] = {
CPIdx,
@@ -2609,7 +2605,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
CurDAG->getEntryNode()
};
ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
- Ops, 5);
+ Ops);
}
ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
return NULL;
@@ -2719,7 +2715,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
MVT::i32);
SDValue Ops[] = { N0.getOperand(0), Imm16,
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
- return CurDAG->getMachineNode(Opc, dl, VT, Ops, 4);
+ return CurDAG->getMachineNode(Opc, dl, VT, Ops);
}
}
break;
@@ -2733,16 +2729,15 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
break;
if (Subtarget->isThumb()) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
- getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
- CurDAG->getRegister(0, MVT::i32) };
- return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32,Ops,4);
+ getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
+ return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops);
} else {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
CurDAG->getRegister(0, MVT::i32) };
return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
ARM::UMULL : ARM::UMULLv5,
- dl, MVT::i32, MVT::i32, Ops, 5);
+ dl, MVT::i32, MVT::i32, Ops);
}
}
case ISD::SMUL_LOHI: {
@@ -2751,14 +2746,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
if (Subtarget->isThumb()) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
- return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32,Ops,4);
+ return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops);
} else {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
CurDAG->getRegister(0, MVT::i32) };
return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
ARM::SMULL : ARM::SMULLv5,
- dl, MVT::i32, MVT::i32, Ops, 5);
+ dl, MVT::i32, MVT::i32, Ops);
}
}
case ARMISD::UMLAL:{
@@ -2766,7 +2761,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32)};
- return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops, 6);
+ return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops);
}else{
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), getAL(CurDAG),
@@ -2774,7 +2769,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
CurDAG->getRegister(0, MVT::i32) };
return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
ARM::UMLAL : ARM::UMLALv5,
- dl, MVT::i32, MVT::i32, Ops, 7);
+ dl, MVT::i32, MVT::i32, Ops);
}
}
case ARMISD::SMLAL:{
@@ -2782,7 +2777,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32)};
- return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops, 6);
+ return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops);
}else{
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), getAL(CurDAG),
@@ -2790,7 +2785,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
CurDAG->getRegister(0, MVT::i32) };
return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
ARM::SMLAL : ARM::SMLALv5,
- dl, MVT::i32, MVT::i32, Ops, 7);
+ dl, MVT::i32, MVT::i32, Ops);
}
}
case ISD::LOAD: {
@@ -2833,7 +2828,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
MVT::i32);
SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
- MVT::Glue, Ops, 5);
+ MVT::Glue, Ops);
Chain = SDValue(ResNode, 0);
if (N->getNumValues() == 2) {
InFlag = SDValue(ResNode, 1);
@@ -2863,7 +2858,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Pred = getAL(CurDAG);
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
- return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+ return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
}
case ARMISD::VUZP: {
unsigned Opc = 0;
@@ -2883,7 +2878,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Pred = getAL(CurDAG);
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
- return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+ return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
}
case ARMISD::VTRN: {
unsigned Opc = 0;
@@ -2902,7 +2897,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Pred = getAL(CurDAG);
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
- return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+ return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
}
case ARMISD::BUILD_VECTOR: {
EVT VecVT = N->getValueType(0);
@@ -3147,8 +3142,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
Ops.push_back(getAL(CurDAG));
Ops.push_back(CurDAG->getRegister(0, MVT::i32));
Ops.push_back(Chain);
- SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(),
- Ops.size());
+ SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
// Transfer memoperands.
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
@@ -3211,8 +3205,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
unsigned NewOpc = isThumb ? ARM::t2STREXD : ARM::STREXD;
- SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops.data(),
- Ops.size());
+ SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
// Transfer memoperands.
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
@@ -3398,7 +3391,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
Ops.push_back(N->getOperand(1));
Ops.push_back(getAL(CurDAG)); // Predicate
Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
- return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops.data(), Ops.size());
+ return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops);
}
case ARMISD::VTBL2: {
DebugLoc dl = N->getDebugLoc();
@@ -3414,8 +3407,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
Ops.push_back(N->getOperand(2));
Ops.push_back(getAL(CurDAG)); // Predicate
Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
- return CurDAG->getMachineNode(ARM::VTBL2, dl, VT,
- Ops.data(), Ops.size());
+ return CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops);
}
case ISD::CONCAT_VECTORS:
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 514971f..9475f1b 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -564,6 +564,16 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
+ // Custom expand long extensions to vectors.
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
+
// NEON does not have single instruction CTPOP for vectors with element
// types wider than 8-bits. However, custom lowering can leverage the
// v8i8/v16i8 vcnt instruction.
@@ -719,7 +729,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
(Subtarget->hasV6Ops() && !Subtarget->isThumb())) {
// membarrier needs custom lowering; the rest are legal and handled
// normally.
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
// Custom lowering for 64-bit ops
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
@@ -737,7 +746,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setInsertFencesForAtomic(true);
} else {
// Set them all for expansion, which will force libcalls.
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
@@ -755,8 +763,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// Unordered/Monotonic case.
setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
- // Since the libcalls include locking, fold in the fences
- setShouldFoldAtomicFences(true);
}
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
@@ -870,8 +876,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// are at least 4 bytes aligned.
setMinStackArgumentAlignment(4);
- BenefitFromCodePlacementOpt = true;
-
// Prefer likely predicted branches to selects on out-of-order cores.
PredictableSelectIsExpensive = Subtarget->isLikeA9();
@@ -1230,7 +1234,8 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
+ SmallVectorImpl<SDValue> &InVals,
+ bool isThisReturn, SDValue ThisVal) const {
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
@@ -1244,6 +1249,15 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign VA = RVLocs[i];
+ // Pass 'this' value directly from the argument to return value, to avoid
+ // reg unit interference
+ if (i == 0 && isThisReturn) {
+ assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
+ "unexpected return calling convention register assignment");
+ InVals.push_back(ThisVal);
+ continue;
+ }
+
SDValue Val;
if (VA.needsCustom()) {
// Handle f64 or half of a v2f64.
@@ -1355,21 +1369,22 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool isVarArg = CLI.IsVarArg;
MachineFunction &MF = DAG.getMachineFunction();
- bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
- bool IsSibCall = false;
+ bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
+ bool isThisReturn = false;
+ bool isSibCall = false;
// Disable tail calls if they're not supported.
if (!EnableARMTailCalls && !Subtarget->supportsTailCall())
isTailCall = false;
if (isTailCall) {
// Check if it's really possible to do a tail call.
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
- isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
+ isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(),
Outs, OutVals, Ins, DAG);
// We don't support GuaranteedTailCallOpt for ARM, only automatically
// detected sibcalls.
if (isTailCall) {
++NumTailCalls;
- IsSibCall = true;
+ isSibCall = true;
}
}
@@ -1385,12 +1400,12 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
unsigned NumBytes = CCInfo.getNextStackOffset();
// For tail calls, memory operands are available in our caller's stack.
- if (IsSibCall)
+ if (isSibCall)
NumBytes = 0;
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
- if (!IsSibCall)
+ if (!isSibCall)
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
@@ -1452,6 +1467,13 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
StackPtr, MemOpChains, Flags);
}
} else if (VA.isRegLoc()) {
+ if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i32) {
+ assert(VA.getLocVT() == MVT::i32 &&
+ "unexpected calling convention register assignment");
+ assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
+ "unexpected use of 'returned'");
+ isThisReturn = true;
+ }
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else if (isByVal) {
assert(VA.isMemLoc());
@@ -1491,7 +1513,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
Ops, array_lengthof(Ops)));
}
- } else if (!IsSibCall) {
+ } else if (!isSibCall) {
assert(VA.isMemLoc());
MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
@@ -1531,7 +1553,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
RegsToPass[i].second, InFlag);
InFlag = Chain.getValue(1);
}
- InFlag =SDValue();
+ InFlag = SDValue();
}
// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
@@ -1672,8 +1694,15 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
RegsToPass[i].second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
+ const uint32_t *Mask;
const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
- const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+ const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI);
+ if (isThisReturn)
+ // For 'this' returns, use the R0-preserving mask
+ Mask = ARI->getThisReturnPreservedMask(CallConv);
+ else
+ Mask = ARI->getCallPreservedMask(CallConv);
+
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -1695,8 +1724,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Handle result values, copying them out of physregs into vregs that we
// return.
- return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins,
- dl, DAG, InVals);
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
+ InVals, isThisReturn,
+ isThisReturn ? OutVals[0] : SDValue());
}
/// HandleByVal - Every parameter *after* a byval parameter is passed
@@ -1711,6 +1741,7 @@ ARMTargetLowering::HandleByVal(
State->getCallOrPrologue() == Call) &&
"unhandled ParmContext");
if ((!State->isFirstByValRegValid()) &&
+ (!Subtarget->isAAPCS_ABI() || State->getNextStackOffset() == 0) &&
(ARM::R0 <= reg) && (reg <= ARM::R3)) {
if (Subtarget->isAAPCS_ABI() && Align > 4) {
unsigned AlignInRegs = Align / 4;
@@ -1866,7 +1897,7 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// local frame.
const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction().
getInfo<ARMFunctionInfo>();
- if (AFI_Caller->getVarArgsRegSaveSize())
+ if (AFI_Caller->getArgRegsSaveSize())
return false;
// If the callee takes no arguments then go on to check the results of the
@@ -2453,35 +2484,6 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
}
}
-static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
- const ARMSubtarget *Subtarget) {
- DebugLoc dl = Op.getDebugLoc();
- if (!Subtarget->hasDataBarrier()) {
- // Some ARMv6 cpus can support data barriers with an mcr instruction.
- // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
- // here.
- assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
- "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
- return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
- DAG.getConstant(0, MVT::i32));
- }
-
- SDValue Op5 = Op.getOperand(5);
- bool isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue() != 0;
- unsigned isLL = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- unsigned isLS = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
- bool isOnlyStoreBarrier = (isLL == 0 && isLS == 0);
-
- ARM_MB::MemBOpt DMBOpt;
- if (isDeviceBarrier)
- DMBOpt = isOnlyStoreBarrier ? ARM_MB::ST : ARM_MB::SY;
- else
- DMBOpt = isOnlyStoreBarrier ? ARM_MB::ISHST : ARM_MB::ISH;
- return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
- DAG.getConstant(DMBOpt, MVT::i32));
-}
-
-
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
// FIXME: handle "fence singlethread" more efficiently.
@@ -2578,7 +2580,8 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
void
ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
- unsigned &VARegSize, unsigned &VARegSaveSize)
+ unsigned &ArgRegsSize,
+ unsigned &ArgRegsSaveSize)
const {
unsigned NumGPRs;
if (CCInfo.isFirstByValRegValid())
@@ -2592,8 +2595,8 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
}
unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
- VARegSize = NumGPRs * 4;
- VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
+ ArgRegsSize = NumGPRs * 4;
+ ArgRegsSaveSize = (ArgRegsSize + Align - 1) & ~(Align - 1);
}
// The remaining GPRs hold either the beginning of variable-argument
@@ -2603,13 +2606,26 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
// If this is a variadic function, the va_list pointer will begin with
// these values; otherwise, this reassembles a (byval) structure that
// was split between registers and memory.
-void
-ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
- DebugLoc dl, SDValue &Chain,
- const Value *OrigArg,
- unsigned OffsetFromOrigArg,
- unsigned ArgOffset,
- bool ForceMutable) const {
+// Return: The frame index registers were stored into.
+int
+ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
+ DebugLoc dl, SDValue &Chain,
+ const Value *OrigArg,
+ unsigned OffsetFromOrigArg,
+ unsigned ArgOffset,
+ bool ForceMutable) const {
+
+ // Currently, two use-cases possible:
+ // Case #1. Non var-args function, and we meet first byval parameter.
+ // Setup first unallocated register as first byval register;
+ // eat all remained registers
+ // (these two actions are performed by HandleByVal method).
+ // Then, here, we initialize stack frame with
+ // "store-reg" instructions.
+ // Case #2. Var-args function, that doesn't contain byval parameters.
+ // The same: eat all remained unallocated registers,
+ // initialize stack frame.
+
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -2621,19 +2637,22 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
(GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
}
- unsigned VARegSize, VARegSaveSize;
- computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize);
- if (VARegSaveSize) {
- // If this function is vararg, store any remaining integer argument regs
- // to their spots on the stack so that they may be loaded by deferencing
- // the result of va_next.
- AFI->setVarArgsRegSaveSize(VARegSaveSize);
- AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(VARegSaveSize,
- ArgOffset + VARegSaveSize
- - VARegSize,
- false));
- SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
- getPointerTy());
+ unsigned ArgRegsSize, ArgRegsSaveSize;
+ computeRegArea(CCInfo, MF, ArgRegsSize, ArgRegsSaveSize);
+
+ // Store any by-val regs to their spots on the stack so that they may be
+ // loaded by deferencing the result of formal parameter pointer or va_next.
+ // Note: once stack area for byval/varargs registers
+ // was initialized, it can't be initialized again.
+ if (!AFI->getArgRegsSaveSize() && ArgRegsSaveSize) {
+
+ AFI->setArgRegsSaveSize(ArgRegsSaveSize);
+
+ int FrameIndex = MFI->CreateFixedObject(
+ ArgRegsSaveSize,
+ ArgOffset + ArgRegsSaveSize - ArgRegsSize,
+ false);
+ SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());
SmallVector<SDValue, 4> MemOps;
for (unsigned i = 0; firstRegToSaveIndex < 4; ++firstRegToSaveIndex, ++i) {
@@ -2656,10 +2675,30 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
if (!MemOps.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOps[0], MemOps.size());
+ return FrameIndex;
} else
// This will point to the next argument passed via stack.
- AFI->setVarArgsFrameIndex(
- MFI->CreateFixedObject(4, ArgOffset, !ForceMutable));
+ return MFI->CreateFixedObject(4, ArgOffset, !ForceMutable);
+}
+
+// Setup stack frame, the va_list pointer will start from.
+void
+ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
+ DebugLoc dl, SDValue &Chain,
+ unsigned ArgOffset,
+ bool ForceMutable) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ // Try to store any remaining integer argument regs
+ // to their spots on the stack so that they may be loaded by deferencing
+ // the result of va_next.
+ // If there is no regs to be stored, just point address after last
+ // argument passed via stack.
+ int FrameIndex =
+ StoreByValRegs(CCInfo, DAG, dl, Chain, 0, 0, ArgOffset, ForceMutable);
+
+ AFI->setVarArgsFrameIndex(FrameIndex);
}
SDValue
@@ -2785,20 +2824,12 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
// Since they could be overwritten by lowering of arguments in case of
// a tail call.
if (Flags.isByVal()) {
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- if (!AFI->getVarArgsFrameIndex()) {
- VarArgStyleRegisters(CCInfo, DAG,
- dl, Chain, CurOrigArg,
- Ins[VA.getValNo()].PartOffset,
- VA.getLocMemOffset(),
- true /*force mutable frames*/);
- int VAFrameIndex = AFI->getVarArgsFrameIndex();
- InVals.push_back(DAG.getFrameIndex(VAFrameIndex, getPointerTy()));
- } else {
- int FI = MFI->CreateFixedObject(Flags.getByValSize(),
- VA.getLocMemOffset(), false);
- InVals.push_back(DAG.getFrameIndex(FI, getPointerTy()));
- }
+ int FrameIndex = StoreByValRegs(
+ CCInfo, DAG, dl, Chain, CurOrigArg,
+ Ins[VA.getValNo()].PartOffset,
+ VA.getLocMemOffset(),
+ true /*force mutable frames*/);
+ InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy()));
} else {
int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
VA.getLocMemOffset(), true);
@@ -2816,7 +2847,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
// varargs
if (isVarArg)
- VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0, 0,
+ VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
CCInfo.getNextStackOffset());
return Chain;
@@ -3433,6 +3464,47 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
return FrameAddr;
}
+/// Custom Expand long vector extensions, where size(DestVec) > 2*size(SrcVec),
+/// and size(DestVec) > 128-bits.
+/// This is achieved by doing the one extension from the SrcVec, splitting the
+/// result, extending these parts, and then concatenating these into the
+/// destination.
+static SDValue ExpandVectorExtension(SDNode *N, SelectionDAG &DAG) {
+ SDValue Op = N->getOperand(0);
+ EVT SrcVT = Op.getValueType();
+ EVT DestVT = N->getValueType(0);
+
+ assert(DestVT.getSizeInBits() > 128 &&
+ "Custom sext/zext expansion needs >128-bit vector.");
+ // If this is a normal length extension, use the default expansion.
+ if (SrcVT.getSizeInBits()*4 != DestVT.getSizeInBits() &&
+ SrcVT.getSizeInBits()*8 != DestVT.getSizeInBits())
+ return SDValue();
+
+ DebugLoc dl = N->getDebugLoc();
+ unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits();
+ unsigned DestEltSize = DestVT.getVectorElementType().getSizeInBits();
+ unsigned NumElts = SrcVT.getVectorNumElements();
+ LLVMContext &Ctx = *DAG.getContext();
+ SDValue Mid, SplitLo, SplitHi, ExtLo, ExtHi;
+
+ EVT MidVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2),
+ NumElts);
+ EVT SplitVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2),
+ NumElts/2);
+ EVT ExtVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, DestEltSize),
+ NumElts/2);
+
+ Mid = DAG.getNode(N->getOpcode(), dl, MidVT, Op);
+ SplitLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid,
+ DAG.getIntPtrConstant(0));
+ SplitHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid,
+ DAG.getIntPtrConstant(NumElts/2));
+ ExtLo = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitLo);
+ ExtHi = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitHi);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, ExtLo, ExtHi);
+}
+
/// ExpandBITCAST - If the target supports VFP, this function is called to
/// expand a bit convert where either the source or destination type is i64 to
/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
@@ -5565,7 +5637,6 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::BR_CC: return LowerBR_CC(Op, DAG);
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
- case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget);
case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);
case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
case ISD::SINT_TO_FP:
@@ -5621,6 +5692,10 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::BITCAST:
Res = ExpandBITCAST(N, DAG);
break;
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ Res = ExpandVectorExtension(N, DAG);
+ break;
case ISD::SRL:
case ISD::SRA:
Res = Expand64BitShift(N, DAG, Subtarget);
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 9ee17f0..46b8438 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -464,7 +464,8 @@ namespace llvm {
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
+ SmallVectorImpl<SDValue> &InVals,
+ bool isThisReturn, SDValue ThisVal) const;
virtual SDValue
LowerFormalArguments(SDValue Chain,
@@ -473,16 +474,21 @@ namespace llvm {
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
+ int StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
+ DebugLoc dl, SDValue &Chain,
+ const Value *OrigArg,
+ unsigned OffsetFromOrigArg,
+ unsigned ArgOffset,
+ bool ForceMutable) const;
+
void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
DebugLoc dl, SDValue &Chain,
- const Value *OrigArg,
- unsigned OffsetFromOrigArg,
unsigned ArgOffset,
- bool ForceMutable = false)
- const;
+ bool ForceMutable = false) const;
void computeRegArea(CCState &CCInfo, MachineFunction &MF,
- unsigned &VARegSize, unsigned &VARegSaveSize) const;
+ unsigned &ArgRegsSize,
+ unsigned &ArgRegsSaveSize) const;
virtual SDValue
LowerCall(TargetLowering::CallLoweringInfo &CLI,
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 9409f35..1bd174e 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -221,6 +221,9 @@ def HasDB : Predicate<"Subtarget->hasDataBarrier()">,
def HasMP : Predicate<"Subtarget->hasMPExtension()">,
AssemblerPredicate<"FeatureMP",
"mp-extensions">;
+def HasTrustZone : Predicate<"Subtarget->hasTrustZone()">,
+ AssemblerPredicate<"FeatureTrustZone",
+ "TrustZone">;
def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
def IsThumb : Predicate<"Subtarget->isThumb()">,
@@ -578,6 +581,17 @@ def imm0_1 : Operand<i32> { let ParserMatchClass = Imm0_1AsmOperand; }
def Imm0_3AsmOperand: ImmAsmOperand { let Name = "Imm0_3"; }
def imm0_3 : Operand<i32> { let ParserMatchClass = Imm0_3AsmOperand; }
+/// imm0_4 predicate - Immediate in the range [0,4].
+def Imm0_4AsmOperand : ImmAsmOperand
+{
+ let Name = "Imm0_4";
+ let DiagnosticType = "ImmRange0_4";
+}
+def imm0_4 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 5; }]> {
+ let ParserMatchClass = Imm0_4AsmOperand;
+ let DecoderMethod = "DecodeImm0_4";
+}
+
/// imm0_7 predicate - Immediate in the range [0,7].
def Imm0_7AsmOperand: ImmAsmOperand { let Name = "Imm0_7"; }
def imm0_7 : Operand<i32>, ImmLeaf<i32, [{
@@ -741,18 +755,26 @@ def imm1_16 : Operand<i32>, PatLeaf<(imm), [{ return Imm > 0 && Imm <= 16; }],
// addrmode_imm12 := reg +/- imm12
//
def MemImm12OffsetAsmOperand : AsmOperandClass { let Name = "MemImm12Offset"; }
-def addrmode_imm12 : Operand<i32>,
+class AddrMode_Imm12 : Operand<i32>,
ComplexPattern<i32, 2, "SelectAddrModeImm12", []> {
// 12-bit immediate operand. Note that instructions using this encode
// #0 and #-0 differently. We flag #-0 as the magic value INT32_MIN. All other
// immediate values are as normal.
let EncoderMethod = "getAddrModeImm12OpValue";
- let PrintMethod = "printAddrModeImm12Operand";
let DecoderMethod = "DecodeAddrModeImm12Operand";
let ParserMatchClass = MemImm12OffsetAsmOperand;
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
}
+
+def addrmode_imm12 : AddrMode_Imm12 {
+ let PrintMethod = "printAddrModeImm12Operand<false>";
+}
+
+def addrmode_imm12_pre : AddrMode_Imm12 {
+ let PrintMethod = "printAddrModeImm12Operand<true>";
+}
+
// ldst_so_reg := reg +/- reg shop imm
//
def MemRegOffsetAsmOperand : AsmOperandClass { let Name = "MemRegOffset"; }
@@ -852,14 +874,23 @@ def am2offset_imm : Operand<i32>,
//
// FIXME: split into imm vs. reg versions.
def AddrMode3AsmOperand : AsmOperandClass { let Name = "AddrMode3"; }
-def addrmode3 : Operand<i32>,
- ComplexPattern<i32, 3, "SelectAddrMode3", []> {
+class AddrMode3 : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectAddrMode3", []> {
let EncoderMethod = "getAddrMode3OpValue";
- let PrintMethod = "printAddrMode3Operand";
let ParserMatchClass = AddrMode3AsmOperand;
let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
}
+def addrmode3 : AddrMode3
+{
+ let PrintMethod = "printAddrMode3Operand<false>";
+}
+
+def addrmode3_pre : AddrMode3
+{
+ let PrintMethod = "printAddrMode3Operand<true>";
+}
+
// FIXME: split into imm vs. reg versions.
// FIXME: parser method to handle +/- register.
def AM3OffsetAsmOperand : AsmOperandClass {
@@ -885,15 +916,22 @@ def ldstm_mode : OptionalDefOperand<OtherVT, (ops i32), (ops (i32 1))> {
// addrmode5 := reg +/- imm8*4
//
def AddrMode5AsmOperand : AsmOperandClass { let Name = "AddrMode5"; }
-def addrmode5 : Operand<i32>,
- ComplexPattern<i32, 2, "SelectAddrMode5", []> {
- let PrintMethod = "printAddrMode5Operand";
+class AddrMode5 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode5", []> {
let EncoderMethod = "getAddrMode5OpValue";
let DecoderMethod = "DecodeAddrMode5Operand";
let ParserMatchClass = AddrMode5AsmOperand;
let MIOperandInfo = (ops GPR:$base, i32imm);
}
+def addrmode5 : AddrMode5 {
+ let PrintMethod = "printAddrMode5Operand<false>";
+}
+
+def addrmode5_pre : AddrMode5 {
+ let PrintMethod = "printAddrMode5Operand<true>";
+}
+
// addrmode6 := reg with optional alignment
//
def AddrMode6AsmOperand : AsmOperandClass { let Name = "AlignedMemory"; }
@@ -1010,7 +1048,8 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
let isReMaterializable = 1 in {
def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
iii, opc, "\t$Rd, $Rn, $imm",
- [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]> {
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>,
+ Sched<[WriteALU, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
bits<12> imm;
@@ -1022,7 +1061,8 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
}
def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
iir, opc, "\t$Rd, $Rn, $Rm",
- [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> {
+ [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
bits<4> Rm;
@@ -1037,7 +1077,8 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
def rsi : AsI1<opcod, (outs GPR:$Rd),
(ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
iis, opc, "\t$Rd, $Rn, $shift",
- [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_imm:$shift))]> {
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_imm:$shift))]>,
+ Sched<[WriteALUsi, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
bits<12> shift;
@@ -1052,7 +1093,8 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
def rsr : AsI1<opcod, (outs GPR:$Rd),
(ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
iis, opc, "\t$Rd, $Rn, $shift",
- [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_reg:$shift))]> {
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_reg:$shift))]>,
+ Sched<[WriteALUsr, ReadALUsr]> {
bits<4> Rd;
bits<4> Rn;
bits<12> shift;
@@ -1079,7 +1121,8 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
let isReMaterializable = 1 in {
def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
iii, opc, "\t$Rd, $Rn, $imm",
- [(set GPR:$Rd, (opnode so_imm:$imm, GPR:$Rn))]> {
+ [(set GPR:$Rd, (opnode so_imm:$imm, GPR:$Rn))]>,
+ Sched<[WriteALU, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
bits<12> imm;
@@ -1091,7 +1134,8 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
}
def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
iir, opc, "\t$Rd, $Rn, $Rm",
- [/* pattern left blank */]> {
+ [/* pattern left blank */]>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
bits<4> Rm;
@@ -1105,7 +1149,8 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
def rsi : AsI1<opcod, (outs GPR:$Rd),
(ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
iis, opc, "\t$Rd, $Rn, $shift",
- [(set GPR:$Rd, (opnode so_reg_imm:$shift, GPR:$Rn))]> {
+ [(set GPR:$Rd, (opnode so_reg_imm:$shift, GPR:$Rn))]>,
+ Sched<[WriteALUsi, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
bits<12> shift;
@@ -1120,7 +1165,8 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
def rsr : AsI1<opcod, (outs GPR:$Rd),
(ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
iis, opc, "\t$Rd, $Rn, $shift",
- [(set GPR:$Rd, (opnode so_reg_reg:$shift, GPR:$Rn))]> {
+ [(set GPR:$Rd, (opnode so_reg_reg:$shift, GPR:$Rn))]>,
+ Sched<[WriteALUsr, ReadALUsr]> {
bits<4> Rd;
bits<4> Rn;
bits<12> shift;
@@ -1145,24 +1191,28 @@ multiclass AsI1_bin_s_irs<InstrItinClass iii, InstrItinClass iir,
bit Commutable = 0> {
def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm, pred:$p),
4, iii,
- [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm))]>;
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm))]>,
+ Sched<[WriteALU, ReadALU]>;
def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, pred:$p),
4, iir,
- [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm))]> {
+ [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm))]>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
let isCommutable = Commutable;
}
def rsi : ARMPseudoInst<(outs GPR:$Rd),
(ins GPR:$Rn, so_reg_imm:$shift, pred:$p),
4, iis,
[(set GPR:$Rd, CPSR, (opnode GPR:$Rn,
- so_reg_imm:$shift))]>;
+ so_reg_imm:$shift))]>,
+ Sched<[WriteALUsi, ReadALU]>;
def rsr : ARMPseudoInst<(outs GPR:$Rd),
(ins GPR:$Rn, so_reg_reg:$shift, pred:$p),
4, iis,
[(set GPR:$Rd, CPSR, (opnode GPR:$Rn,
- so_reg_reg:$shift))]>;
+ so_reg_reg:$shift))]>,
+ Sched<[WriteALUSsr, ReadALUsr]>;
}
}
@@ -1174,19 +1224,22 @@ multiclass AsI1_rbin_s_is<InstrItinClass iii, InstrItinClass iir,
bit Commutable = 0> {
def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm, pred:$p),
4, iii,
- [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn))]>;
+ [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn))]>,
+ Sched<[WriteALU, ReadALU]>;
def rsi : ARMPseudoInst<(outs GPR:$Rd),
(ins GPR:$Rn, so_reg_imm:$shift, pred:$p),
4, iis,
[(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift,
- GPR:$Rn))]>;
+ GPR:$Rn))]>,
+ Sched<[WriteALUsi, ReadALU]>;
def rsr : ARMPseudoInst<(outs GPR:$Rd),
(ins GPR:$Rn, so_reg_reg:$shift, pred:$p),
4, iis,
[(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift,
- GPR:$Rn))]>;
+ GPR:$Rn))]>,
+ Sched<[WriteALUSsr, ReadALUsr]>;
}
}
@@ -1199,7 +1252,8 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc,
PatFrag opnode, bit Commutable = 0> {
def ri : AI1<opcod, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, iii,
opc, "\t$Rn, $imm",
- [(opnode GPR:$Rn, so_imm:$imm)]> {
+ [(opnode GPR:$Rn, so_imm:$imm)]>,
+ Sched<[WriteCMP, ReadALU]> {
bits<4> Rn;
bits<12> imm;
let Inst{25} = 1;
@@ -1212,7 +1266,8 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc,
}
def rr : AI1<opcod, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, iir,
opc, "\t$Rn, $Rm",
- [(opnode GPR:$Rn, GPR:$Rm)]> {
+ [(opnode GPR:$Rn, GPR:$Rm)]>,
+ Sched<[WriteCMP, ReadALU, ReadALU]> {
bits<4> Rn;
bits<4> Rm;
let isCommutable = Commutable;
@@ -1228,7 +1283,8 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc,
def rsi : AI1<opcod, (outs),
(ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, iis,
opc, "\t$Rn, $shift",
- [(opnode GPR:$Rn, so_reg_imm:$shift)]> {
+ [(opnode GPR:$Rn, so_reg_imm:$shift)]>,
+ Sched<[WriteCMPsi, ReadALU]> {
bits<4> Rn;
bits<12> shift;
let Inst{25} = 0;
@@ -1244,7 +1300,8 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc,
def rsr : AI1<opcod, (outs),
(ins GPRnopc:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, iis,
opc, "\t$Rn, $shift",
- [(opnode GPRnopc:$Rn, so_reg_reg:$shift)]> {
+ [(opnode GPRnopc:$Rn, so_reg_reg:$shift)]>,
+ Sched<[WriteCMPsr, ReadALU]> {
bits<4> Rn;
bits<12> shift;
let Inst{25} = 0;
@@ -1326,7 +1383,8 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
[(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm, CPSR))]>,
- Requires<[IsARM]> {
+ Requires<[IsARM]>,
+ Sched<[WriteALU, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
bits<12> imm;
@@ -1338,7 +1396,8 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
DPFrm, IIC_iALUr, opc, "\t$Rd, $Rn, $Rm",
[(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm, CPSR))]>,
- Requires<[IsARM]> {
+ Requires<[IsARM]>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
bits<4> Rm;
@@ -1353,7 +1412,8 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
(ins GPR:$Rn, so_reg_imm:$shift),
DPSoRegImmFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
[(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_imm:$shift, CPSR))]>,
- Requires<[IsARM]> {
+ Requires<[IsARM]>,
+ Sched<[WriteALUsi, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
bits<12> shift;
@@ -1369,7 +1429,8 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
DPSoRegRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
[(set GPRnopc:$Rd, CPSR,
(opnode GPRnopc:$Rn, so_reg_reg:$shift, CPSR))]>,
- Requires<[IsARM]> {
+ Requires<[IsARM]>,
+ Sched<[WriteALUsr, ReadALUsr]> {
bits<4> Rd;
bits<4> Rn;
bits<12> shift;
@@ -1392,7 +1453,8 @@ multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode> {
def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
[(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn, CPSR))]>,
- Requires<[IsARM]> {
+ Requires<[IsARM]>,
+ Sched<[WriteALU, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
bits<12> imm;
@@ -1403,7 +1465,8 @@ multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode> {
}
def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
DPFrm, IIC_iALUr, opc, "\t$Rd, $Rn, $Rm",
- [/* pattern left blank */]> {
+ [/* pattern left blank */]>,
+ Sched<[WriteALU, ReadALU, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
bits<4> Rm;
@@ -1416,7 +1479,8 @@ multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode> {
def rsi : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_imm:$shift),
DPSoRegImmFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
[(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift, GPR:$Rn, CPSR))]>,
- Requires<[IsARM]> {
+ Requires<[IsARM]>,
+ Sched<[WriteALUsi, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
bits<12> shift;
@@ -1430,7 +1494,8 @@ multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode> {
def rsr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_reg:$shift),
DPSoRegRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
[(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift, GPR:$Rn, CPSR))]>,
- Requires<[IsARM]> {
+ Requires<[IsARM]>,
+ Sched<[WriteALUsr, ReadALUsr]> {
bits<4> Rd;
bits<4> Rn;
bits<12> shift;
@@ -1641,11 +1706,11 @@ def ATOMUMAX6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
NoItinerary, []>;
}
-def HINT : AI<(outs), (ins imm0_255:$imm), MiscFrm, NoItinerary,
+def HINT : AI<(outs), (ins imm0_4:$imm), MiscFrm, NoItinerary,
"hint", "\t$imm", []>, Requires<[IsARM, HasV6]> {
- bits<8> imm;
- let Inst{27-8} = 0b00110010000011110000;
- let Inst{7-0} = imm;
+ bits<3> imm;
+ let Inst{27-3} = 0b0011001000001111000000000;
+ let Inst{2-0} = imm;
}
def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6T2]>;
@@ -1842,7 +1907,8 @@ let neverHasSideEffects = 1, isReMaterializable = 1 in
// the instruction. The {24-21} opcode bits are set by the fixup, as we don't
// know until then which form of the instruction will be used.
def ADR : AI1<{0,?,?,0}, (outs GPR:$Rd), (ins adrlabel:$label),
- MiscFrm, IIC_iALUi, "adr", "\t$Rd, $label", []> {
+ MiscFrm, IIC_iALUi, "adr", "\t$Rd, $label", []>,
+ Sched<[WriteALU, ReadALU]> {
bits<4> Rd;
bits<14> label;
let Inst{27-25} = 0b001;
@@ -2049,7 +2115,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
// Secure Monitor Call is a system instruction.
def SMC : ABI<0b0001, (outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt",
- []> {
+ []>, Requires<[IsARM, HasTrustZone]> {
bits<4> opt;
let Inst{23-4} = 0b01100000000000000111;
let Inst{3-0} = opt;
@@ -2210,7 +2276,7 @@ def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2),
multiclass AI2_ldridx<bit isByte, string opc,
InstrItinClass iii, InstrItinClass iir> {
def _PRE_IMM : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb),
- (ins addrmode_imm12:$addr), IndexModePre, LdFrm, iii,
+ (ins addrmode_imm12_pre:$addr), IndexModePre, LdFrm, iii,
opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
bits<17> addr;
let Inst{25} = 0;
@@ -2247,6 +2313,7 @@ multiclass AI2_ldridx<bit isByte, string opc,
let Inst{23} = offset{12};
let Inst{19-16} = addr;
let Inst{11-0} = offset{11-0};
+ let Inst{4} = 0;
let DecoderMethod = "DecodeAddrMode2IdxInstruction";
}
@@ -2279,7 +2346,7 @@ defm LDRB : AI2_ldridx<1, "ldrb", IIC_iLoad_bh_iu, IIC_iLoad_bh_ru>;
multiclass AI3_ldridx<bits<4> op, string opc, InstrItinClass itin> {
def _PRE : AI3ldstidx<op, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
- (ins addrmode3:$addr), IndexModePre,
+ (ins addrmode3_pre:$addr), IndexModePre,
LdMiscFrm, itin,
opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
bits<14> addr;
@@ -2313,7 +2380,7 @@ defm LDRSH : AI3_ldridx<0b1111, "ldrsh", IIC_iLoad_bh_ru>;
defm LDRSB : AI3_ldridx<0b1101, "ldrsb", IIC_iLoad_bh_ru>;
let hasExtraDefRegAllocReq = 1 in {
def LDRD_PRE : AI3ldstidx<0b1101, 0, 1, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb),
- (ins addrmode3:$addr), IndexModePre,
+ (ins addrmode3_pre:$addr), IndexModePre,
LdMiscFrm, IIC_iLoad_d_ru,
"ldrd", "\t$Rt, $Rt2, $addr!",
"$addr.base = $Rn_wb", []> {
@@ -2469,7 +2536,7 @@ def STRD : AI3str<0b1111, (outs), (ins GPR:$Rt, GPR:$src2, addrmode3:$addr),
multiclass AI2_stridx<bit isByte, string opc,
InstrItinClass iii, InstrItinClass iir> {
def _PRE_IMM : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb),
- (ins GPR:$Rt, addrmode_imm12:$addr), IndexModePre,
+ (ins GPR:$Rt, addrmode_imm12_pre:$addr), IndexModePre,
StFrm, iii,
opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
bits<17> addr;
@@ -2591,7 +2658,7 @@ def STRH_preidx: ARMPseudoInst<(outs GPR:$Rn_wb),
def STRH_PRE : AI3ldstidx<0b1011, 0, 1, (outs GPR:$Rn_wb),
- (ins GPR:$Rt, addrmode3:$addr), IndexModePre,
+ (ins GPR:$Rt, addrmode3_pre:$addr), IndexModePre,
StMiscFrm, IIC_iStore_bh_ru,
"strh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
bits<14> addr;
@@ -2623,7 +2690,7 @@ def STRH_POST : AI3ldstidx<0b1011, 0, 0, (outs GPR:$Rn_wb),
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
def STRD_PRE : AI3ldstidx<0b1111, 0, 1, (outs GPR:$Rn_wb),
- (ins GPR:$Rt, GPR:$Rt2, addrmode3:$addr),
+ (ins GPR:$Rt, GPR:$Rt2, addrmode3_pre:$addr),
IndexModePre, StMiscFrm, IIC_iStore_d_ru,
"strd", "\t$Rt, $Rt2, $addr!",
"$addr.base = $Rn_wb", []> {
@@ -3866,28 +3933,33 @@ def UDIV : ADivA1I<0b011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV,
def CLZ : AMiscA1I<0b000010110, 0b0001, (outs GPR:$Rd), (ins GPR:$Rm),
IIC_iUNAr, "clz", "\t$Rd, $Rm",
- [(set GPR:$Rd, (ctlz GPR:$Rm))]>, Requires<[IsARM, HasV5T]>;
+ [(set GPR:$Rd, (ctlz GPR:$Rm))]>, Requires<[IsARM, HasV5T]>,
+ Sched<[WriteALU]>;
def RBIT : AMiscA1I<0b01101111, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
IIC_iUNAr, "rbit", "\t$Rd, $Rm",
[(set GPR:$Rd, (ARMrbit GPR:$Rm))]>,
- Requires<[IsARM, HasV6T2]>;
+ Requires<[IsARM, HasV6T2]>,
+ Sched<[WriteALU]>;
def REV : AMiscA1I<0b01101011, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
IIC_iUNAr, "rev", "\t$Rd, $Rm",
- [(set GPR:$Rd, (bswap GPR:$Rm))]>, Requires<[IsARM, HasV6]>;
+ [(set GPR:$Rd, (bswap GPR:$Rm))]>, Requires<[IsARM, HasV6]>,
+ Sched<[WriteALU]>;
let AddedComplexity = 5 in
def REV16 : AMiscA1I<0b01101011, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
IIC_iUNAr, "rev16", "\t$Rd, $Rm",
[(set GPR:$Rd, (rotr (bswap GPR:$Rm), (i32 16)))]>,
- Requires<[IsARM, HasV6]>;
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteALU]>;
let AddedComplexity = 5 in
def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
IIC_iUNAr, "revsh", "\t$Rd, $Rm",
[(set GPR:$Rd, (sra (bswap GPR:$Rm), (i32 16)))]>,
- Requires<[IsARM, HasV6]>;
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteALU]>;
def : ARMV6Pat<(or (sra (shl GPR:$Rm, (i32 24)), (i32 16)),
(and (srl GPR:$Rm, (i32 8)), 0xFF)),
@@ -3899,7 +3971,8 @@ def PKHBT : APKHI<0b01101000, 0, (outs GPRnopc:$Rd),
[(set GPRnopc:$Rd, (or (and GPRnopc:$Rn, 0xFFFF),
(and (shl GPRnopc:$Rm, pkh_lsl_amt:$sh),
0xFFFF0000)))]>,
- Requires<[IsARM, HasV6]>;
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteALUsi, ReadALU]>;
// Alternate cases for PKHBT where identities eliminate some nodes.
def : ARMV6Pat<(or (and GPRnopc:$Rn, 0xFFFF), (and GPRnopc:$Rm, 0xFFFF0000)),
@@ -3915,7 +3988,8 @@ def PKHTB : APKHI<0b01101000, 1, (outs GPRnopc:$Rd),
[(set GPRnopc:$Rd, (or (and GPRnopc:$Rn, 0xFFFF0000),
(and (sra GPRnopc:$Rm, pkh_asr_amt:$sh),
0xFFFF)))]>,
- Requires<[IsARM, HasV6]>;
+ Requires<[IsARM, HasV6]>,
+ Sched<[WriteALUsi, ReadALU]>;
// Alternate cases for PKHTB where identities eliminate some nodes. Note that
// a shift amount of 0 is *not legal* here, it is PKHBT instead.
@@ -4391,7 +4465,7 @@ multiclass LdStCop<bit load, bit Dbit, string asm> {
let Inst{7-0} = addr{7-0};
let DecoderMethod = "DecodeCopMemInstruction";
}
- def _PRE : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+ def _PRE : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5_pre:$addr),
asm, "\t$cop, $CRd, $addr!", IndexModePre> {
bits<13> addr;
bits<4> cop;
@@ -4462,7 +4536,7 @@ multiclass LdSt2Cop<bit load, bit Dbit, string asm> {
let Inst{7-0} = addr{7-0};
let DecoderMethod = "DecodeCopMemInstruction";
}
- def _PRE : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
+ def _PRE : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5_pre:$addr),
asm, "\t$cop, $CRd, $addr!", IndexModePre> {
bits<13> addr;
bits<4> cop;
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 0411ac4..896fd0f 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -4316,6 +4316,24 @@ def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
+def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
+ (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
+ (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
+ (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
+ (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+
+def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
+ (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
+ (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
+ (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
+ (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+
// Vector Bitwise Operations.
def vnotd : PatFrag<(ops node:$in),
@@ -4889,6 +4907,29 @@ def VABSfq : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
"vabs", "f32",
v4f32, v4f32, fabs>;
+def : Pat<(xor (v2i32 (bitconvert (v8i8 (NEONvshrs DPR:$src, (i32 7))))),
+ (v2i32 (bitconvert (v8i8 (add DPR:$src,
+ (NEONvshrs DPR:$src, (i32 7))))))),
+ (VABSv8i8 DPR:$src)>;
+def : Pat<(xor (v2i32 (bitconvert (v4i16 (NEONvshrs DPR:$src, (i32 15))))),
+ (v2i32 (bitconvert (v4i16 (add DPR:$src,
+ (NEONvshrs DPR:$src, (i32 15))))))),
+ (VABSv4i16 DPR:$src)>;
+def : Pat<(xor (v2i32 (NEONvshrs DPR:$src, (i32 31))),
+ (v2i32 (add DPR:$src, (NEONvshrs DPR:$src, (i32 31))))),
+ (VABSv2i32 DPR:$src)>;
+def : Pat<(xor (v4i32 (bitconvert (v16i8 (NEONvshrs QPR:$src, (i32 7))))),
+ (v4i32 (bitconvert (v16i8 (add QPR:$src,
+ (NEONvshrs QPR:$src, (i32 7))))))),
+ (VABSv16i8 QPR:$src)>;
+def : Pat<(xor (v4i32 (bitconvert (v8i16 (NEONvshrs QPR:$src, (i32 15))))),
+ (v4i32 (bitconvert (v8i16 (add QPR:$src,
+ (NEONvshrs QPR:$src, (i32 15))))))),
+ (VABSv8i16 QPR:$src)>;
+def : Pat<(xor (v4i32 (NEONvshrs QPR:$src, (i32 31))),
+ (v4i32 (add QPR:$src, (NEONvshrs QPR:$src, (i32 31))))),
+ (VABSv4i32 QPR:$src)>;
+
def : Pat<(v2f32 (int_arm_neon_vabs (v2f32 DPR:$src))), (VABSfd DPR:$src)>;
def : Pat<(v4f32 (int_arm_neon_vabs (v4f32 QPR:$src))), (VABSfq QPR:$src)>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index c9d709e..4dacb86 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -150,7 +150,7 @@ def lo5AllOne : PatLeaf<(i32 imm), [{
def t2addrmode_imm12_asmoperand : AsmOperandClass {let Name="MemUImm12Offset";}
def t2addrmode_imm12 : Operand<i32>,
ComplexPattern<i32, 2, "SelectT2AddrModeImm12", []> {
- let PrintMethod = "printAddrModeImm12Operand";
+ let PrintMethod = "printAddrModeImm12Operand<false>";
let EncoderMethod = "getAddrModeImm12OpValue";
let DecoderMethod = "DecodeT2AddrModeImm12";
let ParserMatchClass = t2addrmode_imm12_asmoperand;
@@ -3401,12 +3401,7 @@ class t2CPS<dag iops, string asm_op> : T2XI<(outs), iops, NoItinerary,
bits<5> mode;
bit M;
- let Inst{31-27} = 0b11110;
- let Inst{26} = 0;
- let Inst{25-20} = 0b111010;
- let Inst{19-16} = 0b1111;
- let Inst{15-14} = 0b10;
- let Inst{12} = 0;
+ let Inst{31-11} = 0b111100111010111110000;
let Inst{10-9} = imod;
let Inst{8} = M;
let Inst{7-5} = iflags;
@@ -3425,13 +3420,13 @@ let imod = 0, iflags = 0, M = 1 in
// A6.3.4 Branches and miscellaneous control
// Table A6-14 Change Processor State, and hint instructions
-def t2HINT : T2I<(outs), (ins imm0_255:$imm), NoItinerary, "hint", "\t$imm",[]>{
- bits<8> imm;
- let Inst{31-8} = 0b111100111010111110000000;
- let Inst{7-0} = imm;
+def t2HINT : T2I<(outs), (ins imm0_4:$imm), NoItinerary, "hint", "\t$imm",[]> {
+ bits<3> imm;
+ let Inst{31-3} = 0b11110011101011111000000000000;
+ let Inst{2-0} = imm;
}
-def : t2InstAlias<"hint$p.w $imm", (t2HINT imm0_255:$imm, pred:$p)>;
+def : t2InstAlias<"hint$p.w $imm", (t2HINT imm0_4:$imm, pred:$p)>;
def : t2InstAlias<"nop$p.w", (t2HINT 0, pred:$p)>;
def : t2InstAlias<"yield$p.w", (t2HINT 1, pred:$p)>;
def : t2InstAlias<"wfe$p.w", (t2HINT 2, pred:$p)>;
@@ -3449,7 +3444,8 @@ def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> {
// Secure Monitor Call is a system instruction.
// Option = Inst{19-16}
-def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", []> {
+def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt",
+ []>, Requires<[IsThumb2, HasTrustZone]> {
let Inst{31-27} = 0b11110;
let Inst{26-20} = 0b1111111;
let Inst{15-12} = 0b1000;
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 98bd6c1..c8ed576 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -865,7 +865,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
// Can't do the merge if the destination register is the same as the would-be
// writeback register.
- if (isLd && MI->getOperand(0).getReg() == Base)
+ if (MI->getOperand(0).getReg() == Base)
return false;
unsigned PredReg = 0;
@@ -1258,6 +1258,22 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
// merge the ldr's so far, including this one. But don't try to
// combine the following ldr(s).
Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
+
+ // Watch out for:
+ // r4 := ldr [r0, #8]
+ // r4 := ldr [r0, #4]
+ //
+ // The optimization may reorder the second ldr in front of the first
+ // ldr, which violates write after write(WAW) dependence. The same as
+ // str. Try to merge inst(s) already in MemOps.
+ bool Overlap = false;
+ for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
+ if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
+ Overlap = true;
+ break;
+ }
+ }
+
if (CurrBase == 0 && !Clobber) {
// Start of a new chain.
CurrBase = Base;
@@ -1268,7 +1284,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
++NumMemOps;
Advance = true;
- } else {
+ } else if (!Overlap) {
if (Clobber) {
TryMerge = true;
Advance = true;
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 88d96c0..f4248fc 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -38,7 +38,7 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// VarArgsRegSaveSize - Size of the register save area for vararg functions.
///
- unsigned VarArgsRegSaveSize;
+ unsigned ArgRegsSaveSize;
/// HasStackFrame - True if this function has a stack frame. Set by
/// processFunctionBeforeCalleeSavedScan().
@@ -117,7 +117,7 @@ public:
ARMFunctionInfo() :
isThumb(false),
hasThumb2(false),
- VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
+ ArgRegsSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
@@ -129,7 +129,7 @@ public:
explicit ARMFunctionInfo(MachineFunction &MF) :
isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
- VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
+ ArgRegsSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
@@ -141,8 +141,8 @@ public:
bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
bool isThumb2Function() const { return isThumb && hasThumb2; }
- unsigned getVarArgsRegSaveSize() const { return VarArgsRegSaveSize; }
- void setVarArgsRegSaveSize(unsigned s) { VarArgsRegSaveSize = s; }
+ unsigned getArgRegsSaveSize() const { return ArgRegsSaveSize; }
+ void setArgRegsSaveSize(unsigned s) { ArgRegsSaveSize = s; }
bool hasStackFrame() const { return HasStackFrame; }
void setHasStackFrame(bool s) { HasStackFrame = s; }
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index 02196d0..2d088de 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -6,6 +6,77 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// Instruction scheduling annotations for out-of-order CPUs.
+// These annotations are independent of the itinerary class defined below.
+// Here we define the subtarget independent read/write per-operand resources.
+// The subtarget schedule definitions will then map these to the subtarget's
+// resource usages.
+// For example:
+// The instruction cycle timings table might contain an entry for an operation
+// like the following:
+// Rd <- ADD Rn, Rm, <shift> Rs
+// Uops | Latency from register | Uops - resource requirements - latency
+// 2 | Rn: 1 Rm: 4 Rs: 4 | uop T0, Rm, Rs - P01 - 3
+// | | uopc Rd, Rn, T0 - P01 - 1
+// This is telling us that the result will be available in destination register
+// Rd after a minimum of three cycles after the result in Rm and Rs is available
+// and one cycle after the result in Rn is available. The micro-ops can execute
+// on resource P01.
+// To model this, we need to express that we need to dispatch two micro-ops,
+// that the resource P01 is needed and that the latency to Rn is different than
+// the latency to Rm and Rs. The scheduler can decrease Rn's producer latency by
+// two.
+// We will do this by assigning (abstract) resources to register defs/uses.
+// ARMSchedule.td:
+// def WriteALUsr : SchedWrite;
+// def ReadAdvanceALUsr : ScheRead;
+//
+// ARMInstrInfo.td:
+// def ADDrs : I<>, Sched<[WriteALUsr, ReadAdvanceALUsr, ReadDefault,
+// ReadDefault]> { ...}
+// ReadAdvance read resources allow us to define "pipeline by-passes" or
+// shorter latencies to certain registers as needed in the example above.
+// The "ReadDefault" can be omitted.
+// Next, the subtarget td file assigns resources to the abstract resources
+// defined here.
+// ARMScheduleSubtarget.td:
+// // Resources.
+// def P01 : ProcResource<3>; // ALU unit (3 of it).
+// ...
+// // Resource usages.
+// def : WriteRes<WriteALUsr, [P01, P01]> {
+// Latency = 4; // Latency of 4.
+// NumMicroOps = 2; // Dispatch 2 micro-ops.
+// // The two instances of resource P01 are occupied for one cycle. It is one
+// // cycle because these resources happen to be pipelined.
+// ResourceCycles = [1, 1];
+// }
+// def : ReadAdvance<ReadAdvanceALUsr, 3>;
+
+// Basic ALU operation.
+def WriteALU : SchedWrite;
+def ReadALU : SchedRead;
+
+// Basic ALU with shifts.
+def WriteALUsi : SchedWrite; // Shift by immediate.
+def WriteALUsr : SchedWrite; // Shift by register.
+def WriteALUSsr : SchedWrite; // Shift by register (flag setting).
+def ReadALUsr : SchedRead; // Some operands are read later.
+
+// Compares.
+def WriteCMP : SchedWrite;
+def WriteCMPsi : SchedWrite;
+def WriteCMPsr : SchedWrite;
+
+// Define TII for use in SchedVariant Predicates.
+def : PredicateProlog<[{
+ const ARMBaseInstrInfo *TII =
+ static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo());
+ (void)TII;
+}]>;
+
+def IsPredicatedPred : SchedPredicate<[{TII->isPredicated(MI)}]>;
//===----------------------------------------------------------------------===//
// Instruction Itinerary classes used for ARM
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 4191931..9739ed2 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -1898,6 +1898,8 @@ def CortexA9Model : SchedMachineModel {
//===----------------------------------------------------------------------===//
// Define each kind of processor resource and number available.
+let SchedModel = CortexA9Model in {
+
def A9UnitALU : ProcResource<2>;
def A9UnitMul : ProcResource<1> { let Super = A9UnitALU; }
def A9UnitAGU : ProcResource<1>;
@@ -1918,11 +1920,11 @@ def A9WriteI : SchedWriteRes<[A9UnitALU]>;
def A9WriteIsr : SchedWriteRes<[A9UnitALU]> { let Latency = 2; }
// Basic ALU.
-def A9WriteA : SchedWriteRes<[A9UnitALU]>;
+def A9WriteALU : SchedWriteRes<[A9UnitALU]>;
// ALU with operand shifted by immediate.
-def A9WriteAsi : SchedWriteRes<[A9UnitALU]> { let Latency = 2; }
+def : WriteRes<WriteALUsi, [A9UnitALU]> { let Latency = 2; }
// ALU with operand shifted by register.
-def A9WriteAsr : SchedWriteRes<[A9UnitALU]> { let Latency = 3; }
+def A9WriteALUsr : SchedWriteRes<[A9UnitALU]> { let Latency = 3; }
// Multiplication
def A9WriteM : SchedWriteRes<[A9UnitMul, A9UnitMul]> { let Latency = 4; }
@@ -2003,13 +2005,6 @@ foreach NumCycles = 2-8 in {
def A9WriteCycle#NumCycles : WriteSequence<[A9WriteCycle1], NumCycles>;
} // foreach NumCycles
-// Define TII for use in SchedVariant Predicates.
-def : PredicateProlog<[{
- const ARMBaseInstrInfo *TII =
- static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo());
- (void)TII;
-}]>;
-
// Define address generation sequences and predicates for 8 flavors of LDMs.
foreach NumAddr = 1-8 in {
@@ -2254,11 +2249,11 @@ def A9WriteLMfp : SchedWriteVariant<[
// These mov immediate writers are unconditionally expanded with
// additive latency.
def A9WriteI2 : WriteSequence<[A9WriteI, A9WriteI]>;
-def A9WriteI2pc : WriteSequence<[A9WriteI, A9WriteI, A9WriteA]>;
+def A9WriteI2pc : WriteSequence<[A9WriteI, A9WriteI, WriteALU]>;
def A9WriteI2ld : WriteSequence<[A9WriteI, A9WriteI, A9WriteL]>;
// Some ALU operations can read loaded integer values one cycle early.
-def A9ReadA : SchedReadAdvance<1,
+def A9ReadALU : SchedReadAdvance<1,
[A9WriteL, A9WriteLHi, A9WriteLsi, A9WriteLb, A9WriteLbsi,
A9WriteL1, A9WriteL2, A9WriteL3, A9WriteL4,
A9WriteL5, A9WriteL6, A9WriteL7, A9WriteL8,
@@ -2279,26 +2274,25 @@ def A9Read4 : SchedReadAdvance<3>;
// This table follows the ARM Cortex-A9 Technical Reference Manuals,
// mostly in order.
-let SchedModel = CortexA9Model in {
def :ItinRW<[A9WriteI], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi,
IIC_iMVNi,IIC_iMVNsi,
IIC_iCMOVi,IIC_iCMOVr,IIC_iCMOVsi]>;
-def :ItinRW<[A9WriteI,A9ReadA],[IIC_iMVNr]>;
+def :ItinRW<[A9WriteI,A9ReadALU],[IIC_iMVNr]>;
def :ItinRW<[A9WriteIsr], [IIC_iMOVsr,IIC_iMVNsr,IIC_iCMOVsr]>;
def :ItinRW<[A9WriteI2], [IIC_iMOVix2,IIC_iCMOVix2]>;
def :ItinRW<[A9WriteI2pc], [IIC_iMOVix2addpc]>;
def :ItinRW<[A9WriteI2ld], [IIC_iMOVix2ld]>;
-def :ItinRW<[A9WriteA], [IIC_iBITi,IIC_iBITr,IIC_iUNAr,IIC_iTSTi,IIC_iTSTr]>;
-def :ItinRW<[A9WriteA, A9ReadA], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>;
-def :ItinRW<[A9WriteA, A9ReadA, A9ReadA],[IIC_iALUr,IIC_iCMPr]>;
-def :ItinRW<[A9WriteAsi], [IIC_iBITsi,IIC_iUNAsi,IIC_iEXTr,IIC_iTSTsi]>;
-def :ItinRW<[A9WriteAsi, A9ReadA], [IIC_iALUsi]>;
-def :ItinRW<[A9WriteAsi, ReadDefault, A9ReadA], [IIC_iALUsir]>; // RSB
-def :ItinRW<[A9WriteAsr], [IIC_iBITsr,IIC_iTSTsr,IIC_iEXTAr,IIC_iEXTAsr]>;
-def :ItinRW<[A9WriteAsr, A9ReadA], [IIC_iALUsr,IIC_iCMPsr]>;
+def :ItinRW<[WriteALU], [IIC_iBITi,IIC_iBITr,IIC_iUNAr,IIC_iTSTi,IIC_iTSTr]>;
+def :ItinRW<[WriteALU, A9ReadALU], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>;
+def :ItinRW<[WriteALU, A9ReadALU, A9ReadALU],[IIC_iALUr,IIC_iCMPr]>;
+def :ItinRW<[WriteALUsi], [IIC_iBITsi,IIC_iUNAsi,IIC_iEXTr,IIC_iTSTsi]>;
+def :ItinRW<[WriteALUsi, A9ReadALU], [IIC_iALUsi]>;
+def :ItinRW<[WriteALUsi, ReadDefault, A9ReadALU], [IIC_iALUsir]>; // RSB
+def :ItinRW<[A9WriteALUsr], [IIC_iBITsr,IIC_iTSTsr,IIC_iEXTAr,IIC_iEXTAsr]>;
+def :ItinRW<[A9WriteALUsr, A9ReadALU], [IIC_iALUsr,IIC_iCMPsr]>;
// A9WriteHi ignored for MUL32.
def :ItinRW<[A9WriteM, A9WriteMHi], [IIC_iMUL32,IIC_iMAC32,
@@ -2371,7 +2365,7 @@ def :ItinRW<[A9WriteLMAdr, A9WriteLM, A9WriteIssue], [IIC_iLoad_mu,
IIC_iStore_m,
IIC_iStore_mu]>;
def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteB], [IIC_iLoad_mBr, IIC_iPop_Br]>;
-def :ItinRW<[A9WriteL, A9WriteAdr, A9WriteA], [IIC_iLoadiALU]>;
+def :ItinRW<[A9WriteL, A9WriteAdr, WriteALU], [IIC_iLoadiALU]>;
def :ItinRW<[A9WriteLSfp, A9WriteAdr], [IIC_fpLoad32, IIC_fpLoad64]>;
@@ -2486,4 +2480,17 @@ def :ItinRW<[A9WriteV9, A9Read3, A9Read2], [IIC_VMACD, IIC_VFMACD]>;
def :ItinRW<[A9WriteV10, A9Read3, A9Read2], [IIC_VMACQ, IIC_VFMACQ]>;
def :ItinRW<[A9WriteV9, A9Read2, A9Read2], [IIC_VRECSD]>;
def :ItinRW<[A9WriteV10, A9Read2, A9Read2], [IIC_VRECSQ]>;
+
+// Map SchedRWs that are identical for cortexa9 to existing resources.
+def : SchedAlias<WriteALU, A9WriteALU>;
+def : SchedAlias<WriteALUsr, A9WriteALUsr>;
+def : SchedAlias<WriteALUSsr, A9WriteALUsr>;
+def : SchedAlias<ReadALU, A9ReadALU>;
+def : SchedAlias<ReadALUsr, A9ReadALU>;
+// FIXME: need to special case AND, ORR, EOR, BIC because they don't read
+// advance. But our instrinfo claims it does.
+
+def : SchedAlias<WriteCMP, A9WriteALU>;
+def : SchedAlias<WriteCMPsi, A9WriteALU>;
+def : SchedAlias<WriteCMPsr, A9WriteALU>;
} // SchedModel = CortexA9Model
diff --git a/lib/Target/ARM/ARMScheduleSwift.td b/lib/Target/ARM/ARMScheduleSwift.td
index e9bc3e0..7c6df41 100644
--- a/lib/Target/ARM/ARMScheduleSwift.td
+++ b/lib/Target/ARM/ARMScheduleSwift.td
@@ -1078,8 +1078,67 @@ def SwiftModel : SchedMachineModel {
let IssueWidth = 3; // 3 micro-ops are dispatched per cycle.
let MinLatency = 0; // Data dependencies are allowed within dispatch groups.
let LoadLatency = 3;
+ let MispredictPenalty = 14; // A branch direction mispredict.
let Itineraries = SwiftItineraries;
}
-// TODO: Add Swift processor and scheduler resources.
+// Swift predicates.
+def IsFastImmShiftSwiftPred : SchedPredicate<[{TII->isSwiftFastImmShift(MI)}]>;
+
+// Swift resource mapping.
+let SchedModel = SwiftModel in {
+ // Processor resources.
+ def SwiftUnitP01 : ProcResource<2>; // ALU unit.
+ def SwiftUnitP0 : ProcResource<1> { let Super = SwiftUnitP01; } // Mul unit.
+ def SwiftUnitP1 : ProcResource<1> { let Super = SwiftUnitP01; } // Br unit.
+ def SwiftUnitP2 : ProcResource<1>; // LS unit.
+ def SwiftUnitDiv : ProcResource<1>;
+
+ // Generic resource requirements.
+ def SwiftWriteP01TwoCycle : SchedWriteRes<[SwiftUnitP01]> { let Latency = 2; }
+ def SwiftWriteP01ThreeCycleTwoUops :
+ SchedWriteRes<[SwiftUnitP01, SwiftUnitP01]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+ }
+ def SwiftWriteP0ThreeCycleThreeUops : SchedWriteRes<[SwiftUnitP0]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+ let ResourceCycles = [3];
+ }
+
+ // 4.2.4 Arithmetic and Logical.
+ // ALU operation register shifted by immediate variant.
+ def SwiftWriteALUsi : SchedWriteVariant<[
+ // lsl #2, lsl #1, or lsr #1.
+ SchedVar<IsFastImmShiftSwiftPred, [SwiftWriteP01TwoCycle]>,
+ SchedVar<NoSchedPred, [WriteALU]>
+ ]>;
+ def SwiftWriteALUsr : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [SwiftWriteP01ThreeCycleTwoUops]>,
+ SchedVar<NoSchedPred, [SwiftWriteP01TwoCycle]>
+ ]>;
+ def SwiftWriteALUSsr : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [SwiftWriteP0ThreeCycleThreeUops]>,
+ SchedVar<NoSchedPred, [SwiftWriteP01TwoCycle]>
+ ]>;
+ def SwiftReadAdvanceALUsr : SchedReadVariant<[
+ SchedVar<IsPredicatedPred, [SchedReadAdvance<2>]>,
+ SchedVar<NoSchedPred, [NoReadAdvance]>
+ ]>;
+ // ADC,ADD,NEG,RSB,RSC,SBC,SUB,ADR
+ // AND,BIC,EOR,ORN,ORR
+ // CLZ,RBIT,REV,REV16,REVSH,PKH
+ def : WriteRes<WriteALU, [SwiftUnitP01]>;
+ def : SchedAlias<WriteALUsi, SwiftWriteALUsi>;
+ def : SchedAlias<WriteALUsr, SwiftWriteALUsr>;
+ def : SchedAlias<WriteALUSsr, SwiftWriteALUSsr>;
+ def : ReadAdvance<ReadALU, 0>;
+ def : SchedAlias<ReadALUsr, SwiftReadAdvanceALUsr>;
+
+ // 4.2.5 Integer comparison
+ def : WriteRes<WriteCMP, [SwiftUnitP01]>;
+ def : WriteRes<WriteCMPsi, [SwiftUnitP01]>;
+ def : WriteRes<WriteCMPsr, [SwiftUnitP01]>;
+}
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index f4d568c..3b8e56f 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -19,6 +19,7 @@
#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
@@ -42,12 +43,13 @@ StrictAlign("arm-strict-align", cl::Hidden,
cl::desc("Disallow all unaligned memory accesses"));
ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS)
+ const std::string &FS, const TargetOptions &Options)
: ARMGenSubtargetInfo(TT, CPU, FS)
, ARMProcFamily(Others)
, stackAlignment(4)
, CPUString(CPU)
, TargetTriple(TT)
+ , Options(Options)
, TargetABI(ARM_ABI_APCS) {
initializeEnvironment();
resetSubtargetFeatures(CPU, FS);
@@ -89,9 +91,11 @@ void ARMSubtarget::initializeEnvironment() {
HasRAS = false;
HasMPExtension = false;
FPOnlySP = false;
+ HasTrustZone = false;
AllowsUnalignedMem = false;
Thumb2DSP = false;
UseNaClTrap = false;
+ UnsafeFPMath = false;
}
void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) {
@@ -162,6 +166,12 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
// configuration.
if (!StrictAlign && hasV6Ops() && isTargetDarwin())
AllowsUnalignedMem = true;
+
+ // NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default.
+ uint64_t Bits = getFeatureBits();
+ if ((Bits & ARM::ProcA5 || Bits & ARM::ProcA8) && // Where this matters
+ (Options.UnsafeFPMath || isTargetDarwin()))
+ UseNEONForSinglePrecisionFP = true;
}
/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 8ce22e1..038eb76 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -26,6 +26,7 @@
namespace llvm {
class GlobalValue;
class StringRef;
+class TargetOptions;
class ARMSubtarget : public ARMGenSubtargetInfo {
protected:
@@ -147,6 +148,9 @@ protected:
/// precision.
bool FPOnlySP;
+ /// HasTrustZone - if true, processor supports TrustZone security extensions
+ bool HasTrustZone;
+
/// AllowsUnalignedMem - If true, the subtarget allows unaligned memory
/// accesses for some types. For details, see
/// ARMTargetLowering::allowsUnalignedMemoryAccesses().
@@ -159,6 +163,9 @@ protected:
/// NaCl TRAP instruction is generated instead of the regular TRAP.
bool UseNaClTrap;
+ /// Target machine allowed unsafe FP math (such as use of NEON fp)
+ bool UnsafeFPMath;
+
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment;
@@ -175,6 +182,9 @@ protected:
/// Selected instruction itineraries (one entry per itinerary class.)
InstrItineraryData InstrItins;
+ /// Options passed via command line that could influence the target
+ const TargetOptions &Options;
+
public:
enum {
isELF, isDarwin
@@ -189,7 +199,7 @@ protected:
/// of the specified triple.
///
ARMSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS);
+ const std::string &FS, const TargetOptions &Options);
/// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
/// that still makes it profitable to inline the call.
@@ -244,6 +254,7 @@ public:
bool hasVMLxForwarding() const { return HasVMLxForwarding; }
bool isFPBrccSlow() const { return SlowFPBrcc; }
bool isFPOnlySP() const { return FPOnlySP; }
+ bool hasTrustZone() const { return HasTrustZone; }
bool prefers32BitThumb() const { return Pref32BitThumb; }
bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 3003760..42c7d2c 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -48,7 +48,7 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS),
+ Subtarget(TT, CPU, FS, Options),
JITInfo(),
InstrItins(Subtarget.getInstrItineraryData()) {
// Default to soft float ABI
@@ -185,8 +185,7 @@ bool ARMPassConfig::addPreSched2() {
addPass(createARMLoadStoreOptimizationPass());
printAndVerify("After ARM load / store optimizer");
}
- if ((DisableA15SDOptimization || !getARMSubtarget().isCortexA15()) &&
- getARMSubtarget().hasNEON())
+ if (getARMSubtarget().hasNEON())
addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass));
}
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 140a8db..53ece66 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -125,6 +125,10 @@ public:
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const;
unsigned getAddressComputationCost(Type *Val) const;
+
+ unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ OperandValueKind Op1Info = OK_AnyValue,
+ OperandValueKind Op2Info = OK_AnyValue) const;
/// @}
};
@@ -211,13 +215,21 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
- // Operations that we legalize using load/stores to the stack.
- { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 16*2 + 4*4 },
- { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 16*2 + 4*3 },
- { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 8*2 + 2*4 },
- { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 8*2 + 2*3 },
- { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 4*1 + 16*2 + 2*1 },
- { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2*1 + 8*2 + 1 },
+ // The number of vmovl instructions for the extension.
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
+ { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
+ { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
+ { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
+ { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
+ { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
+
+ // Operations that we legalize using splitting.
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
+ { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
// Vector float <-> i32 conversions.
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
@@ -448,3 +460,67 @@ unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
return LT.first * NEONShuffleTbl[Idx].Cost;
}
+
+unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Op1Info,
+ OperandValueKind Op2Info) const {
+
+ int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
+
+ const unsigned FunctionCallDivCost = 20;
+ const unsigned ReciprocalDivCost = 10;
+ static const CostTblEntry<MVT> CostTbl[] = {
+ // Division.
+ // These costs are somewhat random. Choose a cost of 20 to indicate that
+ // vectorizing devision (added function call) is going to be very expensive.
+ // Double registers types.
+ { ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
+ { ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
+ { ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
+ { ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
+ { ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
+ { ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
+ { ISD::SDIV, MVT::v4i16, ReciprocalDivCost},
+ { ISD::UDIV, MVT::v4i16, ReciprocalDivCost},
+ { ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
+ { ISD::SDIV, MVT::v8i8, ReciprocalDivCost},
+ { ISD::UDIV, MVT::v8i8, ReciprocalDivCost},
+ { ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost},
+ // Quad register types.
+ { ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
+ { ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
+ { ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
+ { ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
+ { ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
+ { ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
+ { ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
+ { ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
+ { ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
+ { ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
+ { ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
+ { ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
+ { ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
+ // Multiplication.
+ };
+
+ int Idx = -1;
+
+ if (ST->hasNEON())
+ Idx = CostTableLookup<MVT>(CostTbl, array_lengthof(CostTbl), ISDOpcode,
+ LT.second);
+
+ if (Idx != -1)
+ return LT.first * CostTbl[Idx].Cost;
+
+
+ return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info,
+ Op2Info);
+}
+
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index c897efd..114cc9e 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -610,6 +610,13 @@ public:
int64_t Value = CE->getValue();
return ((Value & 3) == 0) && Value >= -1020 && Value <= 1020;
}
+ bool isImm0_4() const {
+ if (!isImm()) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ int64_t Value = CE->getValue();
+ return Value >= 0 && Value < 5;
+ }
bool isImm0_1020s4() const {
if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
@@ -4593,20 +4600,26 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
Error(Parser.getTok().getLoc(), "unexpected token in operand");
return true;
case AsmToken::Identifier: {
- if (!tryParseRegisterWithWriteBack(Operands))
- return false;
- int Res = tryParseShiftRegister(Operands);
- if (Res == 0) // success
- return false;
- else if (Res == -1) // irrecoverable error
- return true;
- // If this is VMRS, check for the apsr_nzcv operand.
- if (Mnemonic == "vmrs" &&
- Parser.getTok().getString().equals_lower("apsr_nzcv")) {
- S = Parser.getTok().getLoc();
- Parser.Lex();
- Operands.push_back(ARMOperand::CreateToken("APSR_nzcv", S));
- return false;
+ // If we've seen a branch mnemonic, the next operand must be a label. This
+ // is true even if the label is a register name. So "br r1" means branch to
+ // label "r1".
+ bool ExpectLabel = Mnemonic == "b" || Mnemonic == "bl";
+ if (!ExpectLabel) {
+ if (!tryParseRegisterWithWriteBack(Operands))
+ return false;
+ int Res = tryParseShiftRegister(Operands);
+ if (Res == 0) // success
+ return false;
+ else if (Res == -1) // irrecoverable error
+ return true;
+ // If this is VMRS, check for the apsr_nzcv operand.
+ if (Mnemonic == "vmrs" &&
+ Parser.getTok().getString().equals_lower("apsr_nzcv")) {
+ S = Parser.getTok().getLoc();
+ Parser.Lex();
+ Operands.push_back(ARMOperand::CreateToken("APSR_nzcv", S));
+ return false;
+ }
}
// Fall though for the Identifier case that is not a register or a
@@ -4739,6 +4752,7 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
Mnemonic == "mls" || Mnemonic == "smmls" || Mnemonic == "vcls" ||
Mnemonic == "vmls" || Mnemonic == "vnmls" || Mnemonic == "vacge" ||
Mnemonic == "vcge" || Mnemonic == "vclt" || Mnemonic == "vacgt" ||
+ Mnemonic == "vaclt" || Mnemonic == "vacle" ||
Mnemonic == "vcgt" || Mnemonic == "vcle" || Mnemonic == "smlal" ||
Mnemonic == "umaal" || Mnemonic == "umlal" || Mnemonic == "vabal" ||
Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal" ||
@@ -5008,8 +5022,8 @@ static bool isDataTypeToken(StringRef Tok) {
static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) {
return Mnemonic.startswith("vldm") || Mnemonic.startswith("vstm");
}
-
-static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features);
+static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features,
+ unsigned VariantID);
/// Parse an arm instruction mnemonic followed by its operands.
bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc,
@@ -5020,7 +5034,8 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// MatchInstructionImpl(), but that's too late for aliases that include
// any sort of suffix.
unsigned AvailableFeatures = getAvailableFeatures();
- applyMnemonicAliases(Name, AvailableFeatures);
+ unsigned AssemblerDialect = getParser().getAssemblerDialect();
+ applyMnemonicAliases(Name, AvailableFeatures, AssemblerDialect);
// First check for the ARM-specific .req directive.
if (Parser.getTok().is(AsmToken::Identifier) &&
@@ -7607,6 +7622,11 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return Error(IDLoc, "instruction variant requires ARMv6 or later");
case Match_RequiresThumb2:
return Error(IDLoc, "instruction variant requires Thumb2");
+ case Match_ImmRange0_4: {
+ SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
+ if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ return Error(ErrorLoc, "immediate operand must be in the range [0,4]");
+ }
case Match_ImmRange0_15: {
SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 31a3b0b..ac937f3 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -308,6 +308,8 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeImm0_4(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder);
static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
@@ -1951,10 +1953,12 @@ static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
Inst.addOperand(MCOperand::CreateImm(mode));
if (iflags) S = MCDisassembler::SoftFail;
} else {
- // imod == '00' && M == '0' --> UNPREDICTABLE
- Inst.setOpcode(ARM::t2CPS1p);
- Inst.addOperand(MCOperand::CreateImm(mode));
- S = MCDisassembler::SoftFail;
+ // imod == '00' && M == '0' --> this is a HINT instruction
+ int imm = fieldFromInstruction(Insn, 0, 8);
+ // HINT are defined only for immediate in [0..4]
+ if(imm > 4) return MCDisassembler::Fail;
+ Inst.setOpcode(ARM::t2HINT);
+ Inst.addOperand(MCOperand::CreateImm(imm));
}
return S;
@@ -1996,9 +2000,10 @@ static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn,
imm |= (fieldFromInstruction(Insn, 16, 4) << 12);
if (Inst.getOpcode() == ARM::MOVTi16)
- if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
- if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
if (!tryAddingSymbolicOperand(Address, imm, false, 4, Inst, Decoder))
@@ -3049,9 +3054,9 @@ static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val,
static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
- if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<7>(Val<<1) + 4,
+ if (!tryAddingSymbolicOperand(Address, Address + (Val<<1) + 4,
true, 2, Inst, Decoder))
- Inst.addOperand(MCOperand::CreateImm(SignExtend32<7>(Val << 1)));
+ Inst.addOperand(MCOperand::CreateImm(Val << 1));
return MCDisassembler::Success;
}
@@ -3278,7 +3283,7 @@ static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn,
return MCDisassembler::Fail;
}
- if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder)))
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)))
return MCDisassembler::Fail;
if (load) {
@@ -3570,7 +3575,7 @@ static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
unsigned Rn = fieldFromInstruction(Insn, 16, 4);
unsigned pred = fieldFromInstruction(Insn, 28, 4);
- if (!Check(S, DecoderGPRRegisterClass(Inst, Rd, Address, Decoder)))
+ if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rd, Address, Decoder)))
return MCDisassembler::Fail;
if ((Rt & 1) || Rt == 0xE || Rn == 0xF) return MCDisassembler::Fail;
@@ -4496,6 +4501,15 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
return S;
}
+static DecodeStatus DecodeImm0_4(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder)
+{
+ unsigned Imm = fieldFromInstruction(Insn, 0, 3);
+ if (Imm > 4) return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::CreateImm(Imm));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 2afb20d..3bcd083 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -490,7 +490,8 @@ void ARMInstPrinter::printAM3PostIndexOp(const MCInst *MI, unsigned Op,
}
void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
+ raw_ostream &O,
+ bool AlwaysPrintImm0) {
const MCOperand &MO1 = MI->getOperand(Op);
const MCOperand &MO2 = MI->getOperand(Op+1);
const MCOperand &MO3 = MI->getOperand(Op+2);
@@ -509,7 +510,7 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm());
ARM_AM::AddrOpc op = ARM_AM::getAM3Op(MO3.getImm());
- if (ImmOffs || (op == ARM_AM::sub)) {
+ if (AlwaysPrintImm0 || ImmOffs || (op == ARM_AM::sub)) {
O << ", "
<< markup("<imm:")
<< "#"
@@ -520,6 +521,7 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
O << ']' << markup(">");
}
+template <bool AlwaysPrintImm0>
void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(Op);
@@ -535,7 +537,7 @@ void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
printAM3PostIndexOp(MI, Op, O);
return;
}
- printAM3PreOrOffsetIndexOp(MI, Op, O);
+ printAM3PreOrOffsetIndexOp(MI, Op, O, AlwaysPrintImm0);
}
void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
@@ -593,6 +595,7 @@ void ARMInstPrinter::printLdStmModeOperand(const MCInst *MI, unsigned OpNum,
O << ARM_AM::getAMSubModeStr(Mode);
}
+template <bool AlwaysPrintImm0>
void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
@@ -608,7 +611,7 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm());
unsigned Op = ARM_AM::getAM5Op(MO2.getImm());
- if (ImmOffs || Op == ARM_AM::sub) {
+ if (AlwaysPrintImm0 || ImmOffs || Op == ARM_AM::sub) {
O << ", "
<< markup("<imm:")
<< "#"
@@ -1022,6 +1025,7 @@ void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum,
ARM_AM::getSORegOffset(MO2.getImm()), UseMarkup);
}
+template <bool AlwaysPrintImm0>
void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
@@ -1042,13 +1046,13 @@ void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
OffImm = 0;
if (isSub) {
O << ", "
- << markup("<imm:")
+ << markup("<imm:")
<< "#-" << -OffImm
<< markup(">");
}
- else if (OffImm > 0) {
+ else if (AlwaysPrintImm0 || OffImm > 0) {
O << ", "
- << markup("<imm:")
+ << markup("<imm:")
<< "#" << OffImm
<< markup(">");
}
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index edff75d..344104e 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -47,12 +47,13 @@ public:
raw_ostream &O);
void printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
-
+ template <bool AlwaysPrintImm0>
void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
void printAM3PostIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O);
- void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,raw_ostream &O);
+ void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O,
+ bool AlwaysPrintImm0);
void printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
void printPostIdxRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
@@ -60,6 +61,7 @@ public:
raw_ostream &O);
void printLdStmModeOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ template <bool AlwaysPrintImm0>
void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printAddrMode6Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printAddrMode7Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
@@ -91,6 +93,7 @@ public:
raw_ostream &O);
void printT2SOOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ template<bool AlwaysPrintImm0>
void printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum,
diff --git a/lib/Target/ARM/LICENSE.TXT b/lib/Target/ARM/LICENSE.TXT
index 68afea1..68afea1 100755..100644
--- a/lib/Target/ARM/LICENSE.TXT
+++ b/lib/Target/ARM/LICENSE.TXT
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 418971d..6c3d247 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -13,7 +13,9 @@
//
//===----------------------------------------------------------------------===//
+#include "ARMRegisterInfo.h"
#include "ARMUnwindOp.h"
+#include "ARMUnwindOpAsm.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmBackend.h"
@@ -26,6 +28,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
@@ -33,11 +36,15 @@
#include "llvm/MC/MCValue.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ELF.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+static std::string GetAEABIUnwindPersonalityName(unsigned Index) {
+ assert(Index < NUM_PERSONALITY_INDEX && "Invalid personality index");
+ return (Twine("__aeabi_unwind_cpp_pr") + Twine(Index)).str();
+}
+
namespace {
/// Extend the generic ELFStreamer class so that it can emit mapping symbols at
@@ -57,8 +64,9 @@ public:
ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
MCCodeEmitter *Emitter, bool IsThumb)
: MCELFStreamer(SK_ARMELFStreamer, Context, TAB, OS, Emitter),
- IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None), ExTab(0),
- FnStart(0), Personality(0), CantUnwind(false) {}
+ IsThumb(IsThumb), MappingSymbolCounter(0), LastEMS(EMS_None) {
+ Reset();
+ }
~ARMELFStreamer() {}
@@ -75,14 +83,15 @@ public:
virtual void EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
bool isVector);
- virtual void ChangeSection(const MCSection *Section) {
+ virtual void ChangeSection(const MCSection *Section,
+ const MCExpr *Subsection) {
// We have to keep track of the mapping symbol state of any sections we
// use. Each one should start off as EMS_None, which is provided as the
// default constructor by DenseMap::lookup.
- LastMappingSymbols[getPreviousSection()] = LastEMS;
+ LastMappingSymbols[getPreviousSection().first] = LastEMS;
LastEMS = LastMappingSymbols.lookup(Section);
- MCELFStreamer::ChangeSection(Section);
+ MCELFStreamer::ChangeSection(Section, Subsection);
}
/// This function is the one used to emit instruction data into the ELF
@@ -175,7 +184,7 @@ private:
MCELF::SetType(SD, ELF::STT_NOTYPE);
MCELF::SetBinding(SD, ELF::STB_LOCAL);
SD.setExternal(false);
- Symbol->setSection(*getCurrentSection());
+ Symbol->setSection(*getCurrentSection().first);
const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
Symbol->setVariableValue(Value);
@@ -194,6 +203,7 @@ private:
void Reset();
void EmitPersonalityFixup(StringRef Name);
+ void CollectUnwindOpcodes();
void SwitchToEHSection(const char *Prefix, unsigned Type, unsigned Flags,
SectionKind Kind, const MCSymbol &Fn);
@@ -210,9 +220,16 @@ private:
MCSymbol *ExTab;
MCSymbol *FnStart;
const MCSymbol *Personality;
+ uint32_t VFPRegSave; // Register mask for {d31-d0}
+ uint32_t RegSave; // Register mask for {r15-r0}
+ int64_t SPOffset;
+ uint16_t FPReg;
+ int64_t FPOffset;
+ bool UsedFP;
bool CantUnwind;
+ UnwindOpcodeAssembler UnwindOpAsm;
};
-}
+} // end anonymous namespace
inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
unsigned Type,
@@ -238,7 +255,7 @@ inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
} else {
EHSection = getContext().getELFSection(EHSecName, Type, Flags, Kind);
}
- assert(EHSection);
+ assert(EHSection && "Failed to get the required EH section");
// Switch to .ARM.extab or .ARM.exidx section
SwitchSection(EHSection);
@@ -262,10 +279,20 @@ inline void ARMELFStreamer::SwitchToExIdxSection(const MCSymbol &FnStart) {
}
void ARMELFStreamer::Reset() {
+ const MCRegisterInfo &MRI = getContext().getRegisterInfo();
+
ExTab = NULL;
FnStart = NULL;
Personality = NULL;
+ VFPRegSave = 0;
+ RegSave = 0;
+ FPReg = MRI.getEncodingValue(ARM::SP);
+ FPOffset = 0;
+ SPOffset = 0;
+ UsedFP = false;
CantUnwind = false;
+
+ UnwindOpAsm.Reset();
}
// Add the R_ARM_NONE fixup at the same position
@@ -284,6 +311,18 @@ void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) {
MCFixup::getKindForSize(4, false)));
}
+void ARMELFStreamer::CollectUnwindOpcodes() {
+ if (UsedFP) {
+ UnwindOpAsm.EmitSetFP(FPReg);
+ UnwindOpAsm.EmitSPOffset(-FPOffset);
+ } else {
+ UnwindOpAsm.EmitSPOffset(SPOffset);
+ }
+ UnwindOpAsm.EmitVFPRegSave(VFPRegSave);
+ UnwindOpAsm.EmitRegSave(RegSave);
+ UnwindOpAsm.Finalize();
+}
+
void ARMELFStreamer::EmitFnStart() {
assert(FnStart == 0);
FnStart = getContext().CreateTempSymbol();
@@ -294,35 +333,29 @@ void ARMELFStreamer::EmitFnEnd() {
assert(FnStart && ".fnstart must preceeds .fnend");
// Emit unwind opcodes if there is no .handlerdata directive
- int PersonalityIndex = -1;
if (!ExTab && !CantUnwind) {
- // For __aeabi_unwind_cpp_pr1, we have to emit opcodes in .ARM.extab.
- SwitchToExTabSection(*FnStart);
-
- // Create .ARM.extab label for offset in .ARM.exidx
- ExTab = getContext().CreateTempSymbol();
- EmitLabel(ExTab);
-
- PersonalityIndex = 1;
-
- uint32_t Entry = 0;
- uint32_t NumExtraEntryWords = 0;
- Entry |= NumExtraEntryWords << 24;
- Entry |= (EHT_COMPACT | PersonalityIndex) << 16;
-
- // TODO: This should be generated according to .save, .vsave, .setfp
- // directives. Currently, we are simply generating FINISH opcode.
- Entry |= UNWIND_OPCODE_FINISH << 8;
- Entry |= UNWIND_OPCODE_FINISH;
-
- EmitIntValue(Entry, 4, 0);
+ CollectUnwindOpcodes();
+
+ unsigned PersonalityIndex = UnwindOpAsm.getPersonalityIndex();
+ if (PersonalityIndex == AEABI_UNWIND_CPP_PR1 ||
+ PersonalityIndex == AEABI_UNWIND_CPP_PR2) {
+ // For the __aeabi_unwind_cpp_pr1 and __aeabi_unwind_cpp_pr2, we have to
+ // emit the unwind opcodes in the corresponding ".ARM.extab" section, and
+ // then emit a reference to these unwind opcodes in the second word of
+ // the exception index table entry.
+ SwitchToExTabSection(*FnStart);
+ ExTab = getContext().CreateTempSymbol();
+ EmitLabel(ExTab);
+ EmitBytes(UnwindOpAsm.data(), 0);
+ }
}
// Emit the exception index table entry
SwitchToExIdxSection(*FnStart);
- if (PersonalityIndex == 1)
- EmitPersonalityFixup("__aeabi_unwind_cpp_pr1");
+ unsigned PersonalityIndex = UnwindOpAsm.getPersonalityIndex();
+ if (PersonalityIndex < NUM_PERSONALITY_INDEX)
+ EmitPersonalityFixup(GetAEABIUnwindPersonalityName(PersonalityIndex));
const MCSymbolRefExpr *FnStartRef =
MCSymbolRefExpr::Create(FnStart,
@@ -333,12 +366,22 @@ void ARMELFStreamer::EmitFnEnd() {
if (CantUnwind) {
EmitIntValue(EXIDX_CANTUNWIND, 4, 0);
- } else {
+ } else if (ExTab) {
+ // Emit a reference to the unwind opcodes in the ".ARM.extab" section.
const MCSymbolRefExpr *ExTabEntryRef =
MCSymbolRefExpr::Create(ExTab,
MCSymbolRefExpr::VK_ARM_PREL31,
getContext());
EmitValue(ExTabEntryRef, 4, 0);
+ } else {
+ // For the __aeabi_unwind_cpp_pr0, we have to emit the unwind opcodes in
+ // the second word of exception index table entry. The size of the unwind
+ // opcodes should always be 4 bytes.
+ assert(PersonalityIndex == AEABI_UNWIND_CPP_PR0 &&
+ "Compact model must use __aeabi_cpp_unwind_pr0 as personality");
+ assert(UnwindOpAsm.size() == 4u &&
+ "Unwind opcode size for __aeabi_cpp_unwind_pr0 must be equal to 4");
+ EmitBytes(UnwindOpAsm.data(), 0);
}
// Clean exception handling frame information
@@ -368,36 +411,54 @@ void ARMELFStreamer::EmitHandlerData() {
EmitValue(PersonalityRef, 4, 0);
// Emit unwind opcodes
- uint32_t Entry = 0;
- uint32_t NumExtraEntryWords = 0;
-
- // TODO: This should be generated according to .save, .vsave, .setfp
- // directives. Currently, we are simply generating FINISH opcode.
- Entry |= NumExtraEntryWords << 24;
- Entry |= UNWIND_OPCODE_FINISH << 16;
- Entry |= UNWIND_OPCODE_FINISH << 8;
- Entry |= UNWIND_OPCODE_FINISH;
-
- EmitIntValue(Entry, 4, 0);
+ CollectUnwindOpcodes();
+ EmitBytes(UnwindOpAsm.data(), 0);
}
void ARMELFStreamer::EmitPersonality(const MCSymbol *Per) {
Personality = Per;
+ UnwindOpAsm.setPersonality(Per);
}
-void ARMELFStreamer::EmitSetFP(unsigned NewFpReg,
- unsigned NewSpReg,
+void ARMELFStreamer::EmitSetFP(unsigned NewFPReg,
+ unsigned NewSPReg,
int64_t Offset) {
- // TODO: Not implemented
+ assert(SPOffset == 0 &&
+ "Current implementation assumes .setfp precedes .pad");
+
+ const MCRegisterInfo &MRI = getContext().getRegisterInfo();
+
+ uint16_t NewFPRegEncVal = MRI.getEncodingValue(NewFPReg);
+#ifndef NDEBUG
+ uint16_t NewSPRegEncVal = MRI.getEncodingValue(NewSPReg);
+#endif
+
+ assert((NewSPReg == ARM::SP || NewSPRegEncVal == FPReg) &&
+ "the operand of .setfp directive should be either $sp or $fp");
+
+ UsedFP = true;
+ FPReg = NewFPRegEncVal;
+ FPOffset = Offset;
}
void ARMELFStreamer::EmitPad(int64_t Offset) {
- // TODO: Not implemented
+ SPOffset += Offset;
}
void ARMELFStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
bool IsVector) {
- // TODO: Not implemented
+ const MCRegisterInfo &MRI = getContext().getRegisterInfo();
+
+#ifndef NDEBUG
+ unsigned Max = IsVector ? 32 : 16;
+#endif
+ uint32_t &RegMask = IsVector ? VFPRegSave : RegSave;
+
+ for (size_t i = 0; i < RegList.size(); ++i) {
+ unsigned Reg = MRI.getEncodingValue(RegList[i]);
+ assert(Reg < Max && "Register encoded value out of range");
+ RegMask |= 1u << Reg;
+ }
}
namespace llvm {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h b/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h
index dad5576..fa4add6 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h
@@ -107,6 +107,19 @@ namespace llvm {
UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D8 = 0xd0
};
+ /// ARM-defined Personality Routine Index
+ enum ARMPersonalityRoutineIndex {
+ // To make the exception handling table become more compact, ARM defined
+ // several personality routines in EHABI. There are 3 different
+ // personality routines in ARM EHABI currently. It is possible to have 16
+ // pre-defined personality routines at most.
+ AEABI_UNWIND_CPP_PR0 = 0,
+ AEABI_UNWIND_CPP_PR1 = 1,
+ AEABI_UNWIND_CPP_PR2 = 2,
+
+ NUM_PERSONALITY_INDEX
+ };
+
}
#endif // ARM_UNWIND_OP_H
diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
new file mode 100644
index 0000000..191db69
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
@@ -0,0 +1,198 @@
+//===-- ARMUnwindOpAsm.cpp - ARM Unwind Opcodes Assembler -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the unwind opcode assmebler for ARM exception handling
+// table.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMUnwindOpAsm.h"
+
+#include "ARMUnwindOp.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
+
+using namespace llvm;
+
+void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) {
+ if (RegSave == 0u)
+ return;
+
+ // One byte opcode to save register r14 and r11-r4
+ if (RegSave & (1u << 4)) {
+ // The one byte opcode will always save r4, thus we can't use the one byte
+ // opcode when r4 is not in .save directive.
+
+ // Compute the consecutive registers from r4 to r11.
+ uint32_t Range = 0;
+ uint32_t Mask = (1u << 4);
+ for (uint32_t Bit = (1u << 5); Bit < (1u << 12); Bit <<= 1) {
+ if ((RegSave & Bit) == 0u)
+ break;
+ ++Range;
+ Mask |= Bit;
+ }
+
+ // Emit this opcode when the mask covers every registers.
+ uint32_t UnmaskedReg = RegSave & 0xfff0u & (~Mask);
+ if (UnmaskedReg == 0u) {
+ // Pop r[4 : (4 + n)]
+ Ops.push_back(UNWIND_OPCODE_POP_REG_RANGE_R4 | Range);
+ RegSave &= 0x000fu;
+ } else if (UnmaskedReg == (1u << 14)) {
+ // Pop r[14] + r[4 : (4 + n)]
+ Ops.push_back(UNWIND_OPCODE_POP_REG_RANGE_R4_R14 | Range);
+ RegSave &= 0x000fu;
+ }
+ }
+
+ // Two bytes opcode to save register r15-r4
+ if ((RegSave & 0xfff0u) != 0) {
+ uint32_t Op = UNWIND_OPCODE_POP_REG_MASK_R4 | (RegSave >> 4);
+ Ops.push_back(static_cast<uint8_t>(Op >> 8));
+ Ops.push_back(static_cast<uint8_t>(Op & 0xff));
+ }
+
+ // Opcode to save register r3-r0
+ if ((RegSave & 0x000fu) != 0) {
+ uint32_t Op = UNWIND_OPCODE_POP_REG_MASK | (RegSave & 0x000fu);
+ Ops.push_back(static_cast<uint8_t>(Op >> 8));
+ Ops.push_back(static_cast<uint8_t>(Op & 0xff));
+ }
+}
+
+/// Emit unwind opcodes for .vsave directives
+void UnwindOpcodeAssembler::EmitVFPRegSave(uint32_t VFPRegSave) {
+ size_t i = 32;
+
+ while (i > 16) {
+ uint32_t Bit = 1u << (i - 1);
+ if ((VFPRegSave & Bit) == 0u) {
+ --i;
+ continue;
+ }
+
+ uint32_t Range = 0;
+
+ --i;
+ Bit >>= 1;
+
+ while (i > 16 && (VFPRegSave & Bit)) {
+ --i;
+ ++Range;
+ Bit >>= 1;
+ }
+
+ uint32_t Op =
+ UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD_D16 | ((i - 16) << 4) | Range;
+ Ops.push_back(static_cast<uint8_t>(Op >> 8));
+ Ops.push_back(static_cast<uint8_t>(Op & 0xff));
+ }
+
+ while (i > 0) {
+ uint32_t Bit = 1u << (i - 1);
+ if ((VFPRegSave & Bit) == 0u) {
+ --i;
+ continue;
+ }
+
+ uint32_t Range = 0;
+
+ --i;
+ Bit >>= 1;
+
+ while (i > 0 && (VFPRegSave & Bit)) {
+ --i;
+ ++Range;
+ Bit >>= 1;
+ }
+
+ uint32_t Op = UNWIND_OPCODE_POP_VFP_REG_RANGE_FSTMFDD | (i << 4) | Range;
+ Ops.push_back(static_cast<uint8_t>(Op >> 8));
+ Ops.push_back(static_cast<uint8_t>(Op & 0xff));
+ }
+}
+
+/// Emit unwind opcodes for .setfp directives
+void UnwindOpcodeAssembler::EmitSetFP(uint16_t FPReg) {
+ Ops.push_back(UNWIND_OPCODE_SET_VSP | FPReg);
+}
+
+/// Emit unwind opcodes to update stack pointer
+void UnwindOpcodeAssembler::EmitSPOffset(int64_t Offset) {
+ if (Offset > 0x200) {
+ uint8_t Buff[10];
+ size_t Size = encodeULEB128((Offset - 0x204) >> 2, Buff);
+ Ops.push_back(UNWIND_OPCODE_INC_VSP_ULEB128);
+ Ops.append(Buff, Buff + Size);
+ } else if (Offset > 0) {
+ if (Offset > 0x100) {
+ Ops.push_back(UNWIND_OPCODE_INC_VSP | 0x3fu);
+ Offset -= 0x100;
+ }
+ Ops.push_back(UNWIND_OPCODE_INC_VSP |
+ static_cast<uint8_t>((Offset - 4) >> 2));
+ } else if (Offset < 0) {
+ while (Offset < -0x100) {
+ Ops.push_back(UNWIND_OPCODE_DEC_VSP | 0x3fu);
+ Offset += 0x100;
+ }
+ Ops.push_back(UNWIND_OPCODE_DEC_VSP |
+ static_cast<uint8_t>(((-Offset) - 4) >> 2));
+ }
+}
+
+void UnwindOpcodeAssembler::AddOpcodeSizePrefix(size_t Pos) {
+ size_t SizeInWords = (size() + 3) / 4;
+ assert(SizeInWords <= 0x100u &&
+ "Only 256 additional words are allowed for unwind opcodes");
+ Ops[Pos] = static_cast<uint8_t>(SizeInWords - 1);
+}
+
+void UnwindOpcodeAssembler::AddPersonalityIndexPrefix(size_t Pos, unsigned PI) {
+ assert(PI < NUM_PERSONALITY_INDEX && "Invalid personality prefix");
+ Ops[Pos] = EHT_COMPACT | PI;
+}
+
+void UnwindOpcodeAssembler::EmitFinishOpcodes() {
+ for (size_t i = (0x4u - (size() & 0x3u)) & 0x3u; i > 0; --i)
+ Ops.push_back(UNWIND_OPCODE_FINISH);
+}
+
+void UnwindOpcodeAssembler::Finalize() {
+ if (HasPersonality) {
+ // Personality specified by .personality directive
+ Offset = 1;
+ AddOpcodeSizePrefix(1);
+ } else {
+ if (getOpcodeSize() <= 3) {
+ // __aeabi_unwind_cpp_pr0: [ 0x80 , OP1 , OP2 , OP3 ]
+ Offset = 1;
+ PersonalityIndex = AEABI_UNWIND_CPP_PR0;
+ AddPersonalityIndexPrefix(Offset, PersonalityIndex);
+ } else {
+ // __aeabi_unwind_cpp_pr1: [ 0x81 , SIZE , OP1 , OP2 , ... ]
+ Offset = 0;
+ PersonalityIndex = AEABI_UNWIND_CPP_PR1;
+ AddPersonalityIndexPrefix(Offset, PersonalityIndex);
+ AddOpcodeSizePrefix(1);
+ }
+ }
+
+ // Emit the padding finish opcodes if the size() is not multiple of 4.
+ EmitFinishOpcodes();
+
+ // Swap the byte order
+ uint8_t *Ptr = Ops.begin() + Offset;
+ assert(size() % 4 == 0 && "Final unwind opcodes should align to 4");
+ for (size_t i = 0, n = size(); i < n; i += 4) {
+ std::swap(Ptr[i], Ptr[i + 3]);
+ std::swap(Ptr[i + 1], Ptr[i + 2]);
+ }
+}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
new file mode 100644
index 0000000..f6ecaeb
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h
@@ -0,0 +1,114 @@
+//===-- ARMUnwindOpAsm.h - ARM Unwind Opcodes Assembler ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the unwind opcode assmebler for ARM exception handling
+// table.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_UNWIND_OP_ASM_H
+#define ARM_UNWIND_OP_ASM_H
+
+#include "ARMUnwindOp.h"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class MCSymbol;
+
+class UnwindOpcodeAssembler {
+private:
+ llvm::SmallVector<uint8_t, 8> Ops;
+
+ unsigned Offset;
+ unsigned PersonalityIndex;
+ bool HasPersonality;
+
+ enum {
+ // The number of bytes to be preserved for the size and personality index
+ // prefix of unwind opcodes.
+ NUM_PRESERVED_PREFIX_BUF = 2
+ };
+
+public:
+ UnwindOpcodeAssembler()
+ : Ops(NUM_PRESERVED_PREFIX_BUF), Offset(NUM_PRESERVED_PREFIX_BUF),
+ PersonalityIndex(NUM_PERSONALITY_INDEX), HasPersonality(0) {
+ }
+
+ /// Reset the unwind opcode assembler.
+ void Reset() {
+ Ops.resize(NUM_PRESERVED_PREFIX_BUF);
+ Offset = NUM_PRESERVED_PREFIX_BUF;
+ PersonalityIndex = NUM_PERSONALITY_INDEX;
+ HasPersonality = 0;
+ }
+
+ /// Get the size of the payload (including the size byte)
+ size_t size() const {
+ return Ops.size() - Offset;
+ }
+
+ /// Get the beginning of the payload
+ const uint8_t *begin() const {
+ return Ops.begin() + Offset;
+ }
+
+ /// Get the payload
+ StringRef data() const {
+ return StringRef(reinterpret_cast<const char *>(begin()), size());
+ }
+
+ /// Set the personality index
+ void setPersonality(const MCSymbol *Per) {
+ HasPersonality = 1;
+ }
+
+ /// Get the personality index
+ unsigned getPersonalityIndex() const {
+ return PersonalityIndex;
+ }
+
+ /// Emit unwind opcodes for .save directives
+ void EmitRegSave(uint32_t RegSave);
+
+ /// Emit unwind opcodes for .vsave directives
+ void EmitVFPRegSave(uint32_t VFPRegSave);
+
+ /// Emit unwind opcodes for .setfp directives
+ void EmitSetFP(uint16_t FPReg);
+
+ /// Emit unwind opcodes to update stack pointer
+ void EmitSPOffset(int64_t Offset);
+
+ /// Finalize the unwind opcode sequence for EmitBytes()
+ void Finalize();
+
+private:
+ /// Get the size of the opcodes in bytes.
+ size_t getOpcodeSize() const {
+ return Ops.size() - NUM_PRESERVED_PREFIX_BUF;
+ }
+
+ /// Add the length prefix to the payload
+ void AddOpcodeSizePrefix(size_t Pos);
+
+ /// Add personality index prefix in some compact format
+ void AddPersonalityIndexPrefix(size_t Pos, unsigned PersonalityIndex);
+
+ /// Fill the words with finish opcode if it is not aligned
+ void EmitFinishOpcodes();
+};
+
+} // namespace llvm
+
+#endif // ARM_UNWIND_OP_ASM_H
diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
index e17eb4d..a7ac5ca 100644
--- a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
@@ -8,6 +8,7 @@ add_llvm_library(LLVMARMDesc
ARMMCTargetDesc.cpp
ARMMachObjectWriter.cpp
ARMELFObjectWriter.cpp
+ ARMUnwindOpAsm.cpp
)
add_dependencies(LLVMARMDesc ARMCommonTableGen)
diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt
index 463c440..a64707e 100644
--- a/lib/Target/ARM/README-Thumb.txt
+++ b/lib/Target/ARM/README-Thumb.txt
@@ -173,7 +173,6 @@ GCC is doing a couple of clever things here:
mov r1, #1
lsl r1, r1, #8
tst r2, r1
-
//===---------------------------------------------------------------------===//
@@ -196,7 +195,6 @@ This is especially bad when dynamic alloca is used. The all fixed size stack
objects are referenced off the frame pointer with negative offsets. See
oggenc for an example.
-
//===---------------------------------------------------------------------===//
Poor codegen test/CodeGen/ARM/select.ll f7:
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index 2c3388c..1e2a8b0 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -88,7 +88,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
const Thumb1InstrInfo &TII =
*static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
- unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
unsigned NumBytes = MFI->getStackSize();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
@@ -104,8 +104,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
int FramePtrSpillFI = 0;
- if (VARegSaveSize)
- emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -VARegSaveSize,
+ if (ArgRegsSaveSize)
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -ArgRegsSaveSize,
MachineInstr::FrameSetup);
if (!AFI->hasStackFrame()) {
@@ -249,7 +249,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
const Thumb1InstrInfo &TII =
*static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
- unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+ unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
int NumBytes = (int)MFI->getStackSize();
const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
@@ -300,7 +300,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
}
}
- if (VARegSaveSize) {
+ if (ArgRegsSaveSize) {
// Unlike T2 and ARM mode, the T1 pop instruction cannot restore
// to LR, and we can't pop the value directly to the PC since
// we need to update the SP after popping the value. Therefore, we
@@ -313,7 +313,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
.addReg(ARM::R3, RegState::Define);
- emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, VARegSaveSize);
+ emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
@@ -376,7 +376,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
- bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+ bool isVarArg = AFI->getArgRegsSaveSize() > 0;
DebugLoc DL = MI->getDebugLoc();
MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP));
AddDefaultPred(MIB);
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 609d502..7452fb7 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -588,7 +588,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// means the stack pointer cannot be used to access the emergency spill slot
// when !hasReservedCallFrame().
#ifndef NDEBUG
- if (RS && FrameReg == ARM::SP && FrameIndex == RS->getScavengingFrameIndex()){
+ if (RS && FrameReg == ARM::SP && RS->isScavengingFrameIndex(FrameIndex)){
assert(MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF) &&
"Cannot use SP to access the emergency spill slot in "
"functions without a reserved call frame");
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 67e8ec7..a1b48c2 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/CommandLine.h"
@@ -126,25 +127,41 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
+ MachineMemOperand::MOStore,
+ MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+
if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass ||
RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
RC == &ARM::GPRnopcRegClass) {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
-
- MachineFunction &MF = *MBB.getParent();
- MachineFrameInfo &MFI = *MF.getFrameInfo();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOStore,
- MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2STRi12))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
return;
}
+ if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
+ // Thumb2 STRD expects its dest-registers to be in rGPR. Not a problem for
+ // gsub_0, but needs an extra constraint for gsub_1 (which could be sp
+ // otherwise).
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ MRI->constrainRegClass(SrcReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass);
+
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2STRDi8));
+ AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
+ AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
+ MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+ AddDefaultPred(MIB);
+ return;
+ }
+
ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC, TRI);
}
@@ -153,24 +170,42 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass ||
RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
RC == &ARM::GPRnopcRegClass) {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
-
- MachineFunction &MF = *MBB.getParent();
- MachineFrameInfo &MFI = *MF.getFrameInfo();
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
- MachineMemOperand::MOLoad,
- MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
return;
}
+ if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
+ // Thumb2 LDRD expects its dest-registers to be in rGPR. Not a problem for
+ // gsub_0, but needs an extra constraint for gsub_1 (which could be sp
+ // otherwise).
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ MRI->constrainRegClass(DestReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass);
+
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2LDRDi8));
+ AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
+ AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
+ MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+ AddDefaultPred(MIB);
+
+ if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+ MIB.addReg(DestReg, RegState::ImplicitDefine);
+ return;
+ }
+
ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI);
}
@@ -514,6 +549,15 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
Offset = -Offset;
isSub = true;
}
+ } else if (AddrMode == ARMII::AddrModeT2_i8s4) {
+ Offset += MI.getOperand(FrameRegIdx + 1).getImm() * 4;
+ NumBits = 8;
+ // MCInst operand has already scaled value.
+ Scale = 1;
+ if (Offset < 0) {
+ isSub = true;
+ Offset = -Offset;
+ }
} else {
llvm_unreachable("Unsupported addressing mode!");
}
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 567bc05..4795aae 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -15,6 +15,7 @@
#include "MCTargetDesc/ARMAddressingModes.h"
#include "Thumb2InstrInfo.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -79,11 +80,8 @@ namespace {
{ ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0,1 },
{ ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
{ ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0,1 },
- // FIXME: tMOVi8 and tMVN also partially update CPSR but they are less
- // likely to cause issue in the loop. As a size / performance workaround,
- // they are not marked as such.
- { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,0,0 },
- { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,1,0 },
+ { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,0,0 },
+ { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,1,0 },
// FIXME: Do we need the 16-bit 'S' variant?
{ ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0,0 },
{ ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0,0 },
@@ -149,8 +147,7 @@ namespace {
/// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
DenseMap<unsigned, unsigned> ReduceOpcodeMap;
- bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use,
- bool IsSelfLoop);
+ bool canAddPseudoFlagDep(MachineInstr *Use, bool IsSelfLoop);
bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
bool is2Addr, ARMCC::CondCodes Pred,
@@ -160,33 +157,46 @@ namespace {
const ReduceEntry &Entry);
bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
- const ReduceEntry &Entry, bool LiveCPSR,
- MachineInstr *CPSRDef, bool IsSelfLoop);
+ const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop);
/// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
/// instruction.
bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
- const ReduceEntry &Entry,
- bool LiveCPSR, MachineInstr *CPSRDef,
+ const ReduceEntry &Entry, bool LiveCPSR,
bool IsSelfLoop);
/// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
/// non-two-address instruction.
bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
- const ReduceEntry &Entry,
- bool LiveCPSR, MachineInstr *CPSRDef,
+ const ReduceEntry &Entry, bool LiveCPSR,
bool IsSelfLoop);
/// ReduceMI - Attempt to reduce MI, return true on success.
bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
- bool LiveCPSR, MachineInstr *CPSRDef,
- bool IsSelfLoop);
+ bool LiveCPSR, bool IsSelfLoop);
/// ReduceMBB - Reduce width of instructions in the specified basic block.
bool ReduceMBB(MachineBasicBlock &MBB);
bool OptimizeSize;
bool MinimizeSize;
+
+ // Last instruction to define CPSR in the current block.
+ MachineInstr *CPSRDef;
+ // Was CPSR last defined by a high latency instruction?
+ // When CPSRDef is null, this refers to CPSR defs in predecessors.
+ bool HighLatencyCPSR;
+
+ struct MBBInfo {
+ // The flags leaving this block have high latency.
+ bool HighLatencyCPSR;
+ // Has this block been visited yet?
+ bool Visited;
+
+ MBBInfo() : HighLatencyCPSR(false), Visited(false) {}
+ };
+
+ SmallVector<MBBInfo, 8> BlockInfo;
};
char Thumb2SizeReduce::ID = 0;
}
@@ -207,6 +217,16 @@ static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
return false;
}
+// Check for a likely high-latency flag def.
+static bool isHighLatencyCPSR(MachineInstr *Def) {
+ switch(Def->getOpcode()) {
+ case ARM::FMSTAT:
+ case ARM::tMUL:
+ return true;
+ }
+ return false;
+}
+
/// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations,
/// the 's' 16-bit instruction partially update CPSR. Abort the
/// transformation to avoid adding false dependency on last CPSR setting
@@ -225,20 +245,19 @@ static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
/// In this case it would have been ok to narrow the mul.w to muls since there
/// are indirect RAW dependency between the muls and the mul.w
bool
-Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use,
- bool FirstInSelfLoop) {
+Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Use, bool FirstInSelfLoop) {
// Disable the check for -Oz (aka OptimizeForSizeHarder).
if (MinimizeSize || !STI->avoidCPSRPartialUpdate())
return false;
- if (!Def)
+ if (!CPSRDef)
// If this BB loops back to itself, conservatively avoid narrowing the
// first instruction that does partial flag update.
- return FirstInSelfLoop;
+ return HighLatencyCPSR || FirstInSelfLoop;
SmallSet<unsigned, 2> Defs;
- for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = Def->getOperand(i);
+ for (unsigned i = 0, e = CPSRDef->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = CPSRDef->getOperand(i);
if (!MO.isReg() || MO.isUndef() || MO.isUse())
continue;
unsigned Reg = MO.getReg();
@@ -256,6 +275,16 @@ Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use,
return false;
}
+ // If the current CPSR has high latency, try to avoid the false dependency.
+ if (HighLatencyCPSR)
+ return true;
+
+ // tMOVi8 usually doesn't start long dependency chains, and there are a lot
+ // of them, so always shrink them when CPSR doesn't have high latency.
+ if (Use->getOpcode() == ARM::t2MOVi ||
+ Use->getOpcode() == ARM::t2MOVi16)
+ return false;
+
// No read-after-write dependency. The narrowing will add false dependency.
return true;
}
@@ -498,16 +527,15 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
bool
Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry,
- bool LiveCPSR, MachineInstr *CPSRDef,
- bool IsSelfLoop) {
+ bool LiveCPSR, bool IsSelfLoop) {
unsigned Opc = MI->getOpcode();
if (Opc == ARM::t2ADDri) {
// If the source register is SP, try to reduce to tADDrSPi, otherwise
// it's a normal reduce.
if (MI->getOperand(1).getReg() != ARM::SP) {
- if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop))
+ if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
return true;
- return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
}
// Try to reduce to tADDrSPi.
unsigned Imm = MI->getOperand(2).getImm();
@@ -557,12 +585,12 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
switch (Opc) {
default: break;
case ARM::t2ADDSri: {
- if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop))
+ if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
return true;
// fallthrough
}
case ARM::t2ADDSrr:
- return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
}
}
break;
@@ -574,13 +602,13 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
case ARM::t2UXTB:
case ARM::t2UXTH:
if (MI->getOperand(2).getImm() == 0)
- return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
break;
case ARM::t2MOVi16:
// Can convert only 'pure' immediate operands, not immediates obtained as
// globals' addresses.
if (MI->getOperand(1).isImm())
- return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
break;
case ARM::t2CMPrr: {
// Try to reduce to the lo-reg only version first. Why there are two
@@ -590,9 +618,9 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
// source insn opcode. So for now, we hack a local entry record to use.
static const ReduceEntry NarrowEntry =
{ ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1,0 };
- if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef, IsSelfLoop))
+ if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, IsSelfLoop))
return true;
- return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
+ return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
}
}
return false;
@@ -601,8 +629,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
bool
Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry,
- bool LiveCPSR, MachineInstr *CPSRDef,
- bool IsSelfLoop) {
+ bool LiveCPSR, bool IsSelfLoop) {
if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
return false;
@@ -683,7 +710,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
// Avoid adding a false dependency on partial flag update by some 16-bit
// instructions which has the 's' bit set.
if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
- canAddPseudoFlagDep(CPSRDef, MI, IsSelfLoop))
+ canAddPseudoFlagDep(MI, IsSelfLoop))
return false;
// Add the 16-bit instruction.
@@ -720,8 +747,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
bool
Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry,
- bool LiveCPSR, MachineInstr *CPSRDef,
- bool IsSelfLoop) {
+ bool LiveCPSR, bool IsSelfLoop) {
if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
return false;
@@ -780,7 +806,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
// Avoid adding a false dependency on partial flag update by some 16-bit
// instructions which has the 's' bit set.
if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
- canAddPseudoFlagDep(CPSRDef, MI, IsSelfLoop))
+ canAddPseudoFlagDep(MI, IsSelfLoop))
return false;
// Add the 16-bit instruction.
@@ -865,8 +891,7 @@ static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) {
}
bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
- bool LiveCPSR, MachineInstr *CPSRDef,
- bool IsSelfLoop) {
+ bool LiveCPSR, bool IsSelfLoop) {
unsigned Opcode = MI->getOpcode();
DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
if (OPI == ReduceOpcodeMap.end())
@@ -875,16 +900,16 @@ bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
// Don't attempt normal reductions on "special" cases for now.
if (Entry.Special)
- return ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
+ return ReduceSpecial(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
// Try to transform to a 16-bit two-address instruction.
if (Entry.NarrowOpc2 &&
- ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop))
+ ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
return true;
// Try to transform to a 16-bit non-two-address instruction.
if (Entry.NarrowOpc1 &&
- ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop))
+ ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
return true;
return false;
@@ -895,9 +920,25 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
// Yes, CPSR could be livein.
bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
- MachineInstr *CPSRDef = 0;
MachineInstr *BundleMI = 0;
+ CPSRDef = 0;
+ HighLatencyCPSR = false;
+
+ // Check predecessors for the latest CPSRDef.
+ for (MachineBasicBlock::pred_iterator
+ I = MBB.pred_begin(), E = MBB.pred_end(); I != E; ++I) {
+ const MBBInfo &PInfo = BlockInfo[(*I)->getNumber()];
+ if (!PInfo.Visited) {
+ // Since blocks are visited in RPO, this must be a back-edge.
+ continue;
+ }
+ if (PInfo.HighLatencyCPSR) {
+ HighLatencyCPSR = true;
+ break;
+ }
+ }
+
// If this BB loops back to itself, conservatively avoid narrowing the
// first instruction that does partial flag update.
bool IsSelfLoop = MBB.isSuccessor(&MBB);
@@ -911,13 +952,15 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
BundleMI = MI;
continue;
}
+ if (MI->isDebugValue())
+ continue;
LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
// Does NextMII belong to the same bundle as MI?
bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred();
- if (ReduceMI(MBB, MI, LiveCPSR, CPSRDef, IsSelfLoop)) {
+ if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop)) {
Modified = true;
MachineBasicBlock::instr_iterator I = prior(NextMII);
MI = &*I;
@@ -944,14 +987,19 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
if (MI->isCall()) {
// Calls don't really set CPSR.
CPSRDef = 0;
+ HighLatencyCPSR = false;
IsSelfLoop = false;
} else if (DefCPSR) {
// This is the last CPSR defining instruction.
CPSRDef = MI;
+ HighLatencyCPSR = isHighLatencyCPSR(CPSRDef);
IsSelfLoop = false;
}
}
+ MBBInfo &Info = BlockInfo[MBB.getNumber()];
+ Info.HighLatencyCPSR = HighLatencyCPSR;
+ Info.Visited = true;
return Modified;
}
@@ -967,9 +1015,16 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
MinimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
Attribute::MinSize);
+ BlockInfo.clear();
+ BlockInfo.resize(MF.getNumBlockIDs());
+
+ // Visit blocks in reverse post-order so LastCPSRDef is known for all
+ // predecessors.
+ ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
bool Modified = false;
- for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
- Modified |= ReduceMBB(*I);
+ for (ReversePostOrderTraversal<MachineFunction*>::rpo_iterator
+ I = RPOT.begin(), E = RPOT.end(); I != E; ++I)
+ Modified |= ReduceMBB(**I);
return Modified;
}