aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoey Gouly <joey.gouly@arm.com>2013-09-09 14:21:49 +0000
committerJoey Gouly <joey.gouly@arm.com>2013-09-09 14:21:49 +0000
commitb57d99694b87326a2eea26d76becf67bf5784b49 (patch)
tree9cec38b5b6631727d01203aa4743d43d8024603b
parent7b80d9233a0a69c47d1e5ebe647951349ed166e8 (diff)
downloadexternal_llvm-b57d99694b87326a2eea26d76becf67bf5784b49.zip
external_llvm-b57d99694b87326a2eea26d76becf67bf5784b49.tar.gz
external_llvm-b57d99694b87326a2eea26d76becf67bf5784b49.tar.bz2
[ARMv8] Prevent generation of deprecated IT blocks on ARMv8 in Thumb mode.
IT blocks can only be one instruction lonf, and can only contain a subset of the 16 instructions. Patch by Artyom Skrobov! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190309 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/CodeGen/IfConversion.cpp39
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp82
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp6
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp7
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp62
-rw-r--r--test/CodeGen/ARM/2013-05-05-IfConvertBug.ll22
-rw-r--r--test/CodeGen/ARM/arm-and-tst-peephole.ll23
-rw-r--r--test/CodeGen/ARM/fast-isel-select.ll1
-rw-r--r--test/CodeGen/ARM/indirectbr.ll4
-rw-r--r--test/CodeGen/ARM/thumb2-it-block.ll5
-rw-r--r--test/CodeGen/Thumb2/v8_IT_1.ll19
-rw-r--r--test/CodeGen/Thumb2/v8_IT_2.ll37
-rw-r--r--test/CodeGen/Thumb2/v8_IT_3.ll75
-rw-r--r--test/CodeGen/Thumb2/v8_IT_4.ll43
-rw-r--r--test/CodeGen/Thumb2/v8_IT_5.ll62
15 files changed, 428 insertions, 59 deletions
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 1ae7e3b..418994d 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -666,32 +666,29 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
bool isPredicated = TII->isPredicated(I);
bool isCondBr = BBI.IsBrAnalyzable && I->isConditionalBranch();
- if (!isCondBr) {
- if (!isPredicated) {
- BBI.NonPredSize++;
- unsigned ExtraPredCost = 0;
- unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I,
- &ExtraPredCost);
- if (NumCycles > 1)
- BBI.ExtraCost += NumCycles-1;
- BBI.ExtraCost2 += ExtraPredCost;
- } else if (!AlreadyPredicated) {
- // FIXME: This instruction is already predicated before the
- // if-conversion pass. It's probably something like a conditional move.
- // Mark this block unpredicable for now.
- BBI.IsUnpredicable = true;
- return;
- }
+ // A conditional branch is not predicable, but it may be eliminated.
+ if (isCondBr)
+ continue;
+
+ if (!isPredicated) {
+ BBI.NonPredSize++;
+ unsigned ExtraPredCost = 0;
+ unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I,
+ &ExtraPredCost);
+ if (NumCycles > 1)
+ BBI.ExtraCost += NumCycles-1;
+ BBI.ExtraCost2 += ExtraPredCost;
+ } else if (!AlreadyPredicated) {
+ // FIXME: This instruction is already predicated before the
+ // if-conversion pass. It's probably something like a conditional move.
+ // Mark this block unpredicable for now.
+ BBI.IsUnpredicable = true;
+ return;
}
if (BBI.ClobbersPred && !isPredicated) {
// Predicate modification instruction should end the block (except for
// already predicated instructions and end of block branches).
- if (isCondBr) {
- // A conditional branch is not predicable, but it may be eliminated.
- continue;
- }
-
// Predicate may have been modified, the subsequent (currently)
// unpredicated instructions cannot be correctly predicated.
BBI.IsUnpredicable = true;
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 62e8063..696d039 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -513,6 +513,74 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
return Found;
}
+static bool isV8EligibleForIT(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default:
+ return false;
+ case ARM::tADC:
+ case ARM::tADDi3:
+ case ARM::tADDi8:
+ case ARM::tADDrSPi:
+ case ARM::tADDrr:
+ case ARM::tAND:
+ case ARM::tASRri:
+ case ARM::tASRrr:
+ case ARM::tBIC:
+ case ARM::tCMNz:
+ case ARM::tCMPi8:
+ case ARM::tCMPr:
+ case ARM::tEOR:
+ case ARM::tLDRBi:
+ case ARM::tLDRBr:
+ case ARM::tLDRHi:
+ case ARM::tLDRHr:
+ case ARM::tLDRSB:
+ case ARM::tLDRSH:
+ case ARM::tLDRi:
+ case ARM::tLDRr:
+ case ARM::tLDRspi:
+ case ARM::tLSLri:
+ case ARM::tLSLrr:
+ case ARM::tLSRri:
+ case ARM::tLSRrr:
+ case ARM::tMOVi8:
+ case ARM::tMUL:
+ case ARM::tMVN:
+ case ARM::tORR:
+ case ARM::tROR:
+ case ARM::tRSB:
+ case ARM::tSBC:
+ case ARM::tSTRBi:
+ case ARM::tSTRBr:
+ case ARM::tSTRHi:
+ case ARM::tSTRHr:
+ case ARM::tSTRi:
+ case ARM::tSTRr:
+ case ARM::tSTRspi:
+ case ARM::tSUBi3:
+ case ARM::tSUBi8:
+ case ARM::tSUBrr:
+ case ARM::tTST:
+ return true;
+// there are some "conditionally deprecated" opcodes
+ case ARM::tADDspr:
+ return MI->getOperand(2).getReg() != ARM::PC;
+ case ARM::tADDrSP:
+ case ARM::tBX:
+ case ARM::tBLXr:
+ // ADD PC, SP and BLX PC were always unpredictable,
+ // now on top of it they're deprecated
+ return MI->getOperand(0).getReg() != ARM::PC;
+ case ARM::tADDhirr:
+ return MI->getOperand(0).getReg() != ARM::PC &&
+ MI->getOperand(2).getReg() != ARM::PC;
+ case ARM::tCMPhir:
+ case ARM::tMOVr:
+ return MI->getOperand(0).getReg() != ARM::PC &&
+ MI->getOperand(1).getReg() != ARM::PC;
+ }
+}
+
/// isPredicable - Return true if the specified instruction can be predicated.
/// By default, this returns true for every instruction with a
/// PredicateOperand.
@@ -520,11 +588,17 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
if (!MI->isPredicable())
return false;
- if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) {
- ARMFunctionInfo *AFI =
- MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
- return AFI->isThumb2Function();
+ ARMFunctionInfo *AFI =
+ MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
+
+ if (AFI->isThumb2Function()) {
+ if (getSubtarget().hasV8Ops())
+ return isV8EligibleForIT(MI);
+ } else { // non-Thumb
+ if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)
+ return false;
}
+
return true;
}
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 3cbb9a8..f3a74c7 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -254,10 +254,10 @@ bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
const MCInstrDesc &MCID = MI->getDesc();
- // If we're a thumb2 or not NEON function we were handled via isPredicable.
+ // If we're a thumb2 or not NEON function we'll be handled via isPredicable.
if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON ||
AFI->isThumb2Function())
- return false;
+ return MI->isPredicable();
for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i)
if (MCID.OpInfo[i].isPredicate())
@@ -278,7 +278,7 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
// Do we use a predicate? or...
// Are we NEON in ARM mode and have a predicate operand? If so, I know
// we're not predicable but add it anyways.
- if (TII.isPredicable(MI) || isARMNEONPred(MI))
+ if (isARMNEONPred(MI))
AddDefaultPred(MIB);
// Do we optionally set a predicate? Preds is size > 0 iff the predicate
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 1ba78e4..c78db54 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -196,8 +196,13 @@ bool ARMPassConfig::addPreSched2() {
addPass(createARMExpandPseudoPass());
if (getOptLevel() != CodeGenOpt::None) {
- if (!getARMSubtarget().isThumb1Only())
+ if (!getARMSubtarget().isThumb1Only()) {
+ // in v8, IfConversion depends on Thumb instruction widths
+ if (getARMSubtarget().hasV8Ops() &&
+ !getARMSubtarget().prefers32BitThumb())
+ addPass(createThumb2SizeReductionPass());
addPass(&IfConverterID);
+ }
}
if (getARMSubtarget().isThumb2())
addPass(createThumb2ITBlockPass());
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index d8596d7..8f1ae2e 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -28,6 +28,7 @@ namespace {
static char ID;
Thumb2ITBlockPass() : MachineFunctionPass(ID) {}
+ bool hasV8Ops;
const Thumb2InstrInfo *TII;
const TargetRegisterInfo *TRI;
ARMFunctionInfo *AFI;
@@ -192,37 +193,41 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
// Form IT block.
ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
unsigned Mask = 0, Pos = 3;
- // Branches, including tricky ones like LDM_RET, need to end an IT
- // block so check the instruction we just put in the block.
- for (; MBBI != E && Pos &&
- (!MI->isBranch() && !MI->isReturn()) ; ++MBBI) {
- if (MBBI->isDebugValue())
- continue;
-
- MachineInstr *NMI = &*MBBI;
- MI = NMI;
-
- unsigned NPredReg = 0;
- ARMCC::CondCodes NCC = getITInstrPredicate(NMI, NPredReg);
- if (NCC == CC || NCC == OCC) {
- Mask |= (NCC & 1) << Pos;
- // Add implicit use of ITSTATE.
- NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
- true/*isImp*/, false/*isKill*/));
- LastITMI = NMI;
- } else {
- if (NCC == ARMCC::AL &&
- MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) {
- --MBBI;
- MBB.remove(NMI);
- MBB.insert(InsertPos, NMI);
- ++NumMovedInsts;
+
+ // v8 IT blocks are limited to one conditional op: skip the loop
+ if (!hasV8Ops) {
+ // Branches, including tricky ones like LDM_RET, need to end an IT
+ // block so check the instruction we just put in the block.
+ for (; MBBI != E && Pos &&
+ (!MI->isBranch() && !MI->isReturn()) ; ++MBBI) {
+ if (MBBI->isDebugValue())
continue;
+
+ MachineInstr *NMI = &*MBBI;
+ MI = NMI;
+
+ unsigned NPredReg = 0;
+ ARMCC::CondCodes NCC = getITInstrPredicate(NMI, NPredReg);
+ if (NCC == CC || NCC == OCC) {
+ Mask |= (NCC & 1) << Pos;
+ // Add implicit use of ITSTATE.
+ NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
+ true/*isImp*/, false/*isKill*/));
+ LastITMI = NMI;
+ } else {
+ if (NCC == ARMCC::AL &&
+ MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) {
+ --MBBI;
+ MBB.remove(NMI);
+ MBB.insert(InsertPos, NMI);
+ ++NumMovedInsts;
+ continue;
+ }
+ break;
}
- break;
+ TrackDefUses(NMI, Defs, Uses, TRI);
+ --Pos;
}
- TrackDefUses(NMI, Defs, Uses, TRI);
- --Pos;
}
// Finalize IT mask.
@@ -250,6 +255,7 @@ bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) {
AFI = Fn.getInfo<ARMFunctionInfo>();
TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
TRI = TM.getRegisterInfo();
+ hasV8Ops = TM.getSubtarget<ARMSubtarget>().hasV8Ops();
if (!AFI->isThumbFunction())
return false;
diff --git a/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll b/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll
index 2eeebac..8bc8cb1 100644
--- a/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll
+++ b/test/CodeGen/ARM/2013-05-05-IfConvertBug.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv8 | FileCheck -check-prefix=CHECK-V8 %s
; rdar://13782395
define i32 @t1(i32 %a, i32 %b, i8** %retaddr) {
@@ -100,6 +101,27 @@ KBBlockZero.exit: ; preds = %bb2.i
; CHECK: [[LABEL]]:
; CHECK-NEXT: subs r0, r1, r0
; CHECK-NEXT: bx lr
+
+; CHECK-V8-LABEL: wrapDistance:
+; CHECK-V8: cmp r1, #59
+; CHECK-V8-NEXT: bgt
+; CHECK-V8-NEXT: %if.then
+; CHECK-V8-NEXT: subs r0, r2, #1
+; CHECK-V8-NEXT: bx lr
+; CHECK-V8-NEXT: %if.else
+; CHECK-V8-NEXT: subs [[REG:r[0-9]+]], #120
+; CHECK-V8-NEXT: cmp [[REG]], r1
+; CHECK-V8-NEXT: bge
+; CHECK-V8-NEXT: %if.else
+; CHECK-V8-NEXT: cmp r0, #119
+; CHECK-V8-NEXT: bgt
+; CHECK-V8-NEXT: %if.then4
+; CHECK-V8-NEXT: adds r0, r1, #1
+; CHECK-V8-NEXT: bx lr
+; CHECK-V8-NEXT: %if.end5
+; CHECK-V8-NEXT: subs r0, r1, r0
+; CHECK-V8-NEXT: bx lr
+
define i32 @wrapDistance(i32 %tx, i32 %sx, i32 %w) {
entry:
%cmp = icmp slt i32 %sx, 60
diff --git a/test/CodeGen/ARM/arm-and-tst-peephole.ll b/test/CodeGen/ARM/arm-and-tst-peephole.ll
index 0762070..88d797e 100644
--- a/test/CodeGen/ARM/arm-and-tst-peephole.ll
+++ b/test/CodeGen/ARM/arm-and-tst-peephole.ll
@@ -1,6 +1,7 @@
; RUN: llc < %s -march=arm | FileCheck -check-prefix=ARM %s
; RUN: llc < %s -march=thumb | FileCheck -check-prefix=THUMB %s
; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck -check-prefix=T2 %s
+; RUN: llc < %s -mtriple=thumbv8 | FileCheck -check-prefix=V8 %s
; FIXME: The -march=thumb test doesn't change if -disable-peephole is specified.
@@ -39,6 +40,17 @@ tailrecurse: ; preds = %sw.bb, %entry
br i1 %tst, label %sw.bb, label %tailrecurse.switch
tailrecurse.switch: ; preds = %tailrecurse
+; V8-LABEL: %tailrecurse.switch
+; V8: cmp
+; V8-NEXT: beq
+; V8-NEXT: %tailrecurse.switch
+; V8: cmp
+; V8-NEXT: beq
+; V8-NEXT: %tailrecurse.switch
+; V8: cmp
+; V8-NEXT: beq
+; V8-NEXT: b
+; The trailing space in the last line checks that the branch is unconditional
switch i32 %and, label %sw.epilog [
i32 1, label %sw.bb
i32 3, label %sw.bb6
@@ -73,6 +85,7 @@ sw.epilog: ; preds = %tailrecurse.switch
; ARM: bar
; THUMB: bar
; T2: bar
+; V8-LABEL: bar:
define internal zeroext i8 @bar(%struct.S* %x, %struct.S* nocapture %y) nounwind readonly {
entry:
%0 = getelementptr inbounds %struct.S* %x, i32 0, i32 1, i32 0
@@ -81,22 +94,32 @@ entry:
; ARM: ands
; THUMB: ands
; T2: ands
+; V8: ands
+; V8-NEXT: beq
%3 = and i32 %2, 112
%4 = icmp eq i32 %3, 0
br i1 %4, label %return, label %bb
bb: ; preds = %entry
+; V8-NEXT: %bb
%5 = getelementptr inbounds %struct.S* %y, i32 0, i32 1, i32 0
%6 = load i8* %5, align 1
%7 = zext i8 %6 to i32
; ARM: andsne
; THUMB: ands
; T2: andsne
+; V8: ands
+; V8-NEXT: beq
%8 = and i32 %7, 112
%9 = icmp eq i32 %8, 0
br i1 %9, label %return, label %bb2
bb2: ; preds = %bb
+; V8-NEXT: %bb2
+; V8-NEXT: cmp
+; V8-NEXT: it ne
+; V8-NEXT: cmpne
+; V8-NEXT: bne
%10 = icmp eq i32 %3, 16
%11 = icmp eq i32 %8, 16
%or.cond = or i1 %10, %11
diff --git a/test/CodeGen/ARM/fast-isel-select.ll b/test/CodeGen/ARM/fast-isel-select.ll
index dbec1c6..40f8807 100644
--- a/test/CodeGen/ARM/fast-isel-select.ll
+++ b/test/CodeGen/ARM/fast-isel-select.ll
@@ -1,6 +1,7 @@
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv8-apple-ios | FileCheck %s --check-prefix=THUMB
define i32 @t1(i1 %c) nounwind readnone {
entry:
diff --git a/test/CodeGen/ARM/indirectbr.ll b/test/CodeGen/ARM/indirectbr.ll
index 99e84a6..1aeeb91 100644
--- a/test/CodeGen/ARM/indirectbr.ll
+++ b/test/CodeGen/ARM/indirectbr.ll
@@ -1,6 +1,7 @@
; RUN: llc < %s -relocation-model=pic -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=ARM
; RUN: llc < %s -relocation-model=pic -mtriple=thumbv6-apple-darwin | FileCheck %s -check-prefix=THUMB
; RUN: llc < %s -relocation-model=static -mtriple=thumbv7-apple-darwin | FileCheck %s -check-prefix=THUMB2
+; RUN: llc < %s -relocation-model=static -mtriple=thumbv8-apple-darwin | FileCheck %s -check-prefix=THUMB2
@nextaddr = global i8* null ; <i8**> [#uses=2]
@C.0.2070 = private constant [5 x i8*] [i8* blockaddress(@foo, %L1), i8* blockaddress(@foo, %L2), i8* blockaddress(@foo, %L3), i8* blockaddress(@foo, %L4), i8* blockaddress(@foo, %L5)] ; <[5 x i8*]*> [#uses=1]
@@ -48,14 +49,17 @@ L2: ; preds = %L3, %bb2
L1: ; preds = %L2, %bb2
%res.3 = phi i32 [ %phitmp, %L2 ], [ 2, %bb2 ] ; <i32> [#uses=1]
+; ARM-LABEL: %L1
; ARM: ldr [[R1:r[0-9]+]], LCPI
; ARM: add [[R1b:r[0-9]+]], pc, [[R1]]
; ARM: str [[R1b]]
+; THUMB-LABEL: %L1
; THUMB: ldr
; THUMB: add
; THUMB: ldr [[R2:r[0-9]+]], LCPI
; THUMB: add [[R2]], pc
; THUMB: str [[R2]]
+; THUMB2-LABEL: %L1
; THUMB2: ldr [[R2:r[0-9]+]], LCPI
; THUMB2-NEXT: str{{(.w)?}} [[R2]]
store i8* blockaddress(@foo, %L5), i8** @nextaddr, align 4
diff --git a/test/CodeGen/ARM/thumb2-it-block.ll b/test/CodeGen/ARM/thumb2-it-block.ll
index a25352c..47c5dcc 100644
--- a/test/CodeGen/ARM/thumb2-it-block.ll
+++ b/test/CodeGen/ARM/thumb2-it-block.ll
@@ -1,14 +1,15 @@
; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv8 | FileCheck %s
; PR11107
define i32 @test(i32 %a, i32 %b) {
entry:
; CHECK: cmp
; CHECK-NEXT: it mi
-; CHECK-NEXT: rsbmi
+; CHECK-NEXT: rsb{{s?}}mi
; CHECK-NEXT: cmp
; CHECK-NEXT: it mi
-; CHECK-NEXT: rsbmi
+; CHECK-NEXT: rsb{{s?}}mi
%cmp1 = icmp slt i32 %a, 0
%sub1 = sub nsw i32 0, %a
%abs1 = select i1 %cmp1, i32 %sub1, i32 %a
diff --git a/test/CodeGen/Thumb2/v8_IT_1.ll b/test/CodeGen/Thumb2/v8_IT_1.ll
new file mode 100644
index 0000000..e33845d
--- /dev/null
+++ b/test/CodeGen/Thumb2/v8_IT_1.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=thumbv8 -mattr=+neon | FileCheck %s
+
+;CHECK-LABEL: select_s_v_v:
+;CHECK: beq .LBB0_2
+;CHECK-NEXT: @ BB#1:
+;CHECK-NEXT: vmov.i32
+;CHECK-NEXT: .LBB0_2:
+;CHECK: bx
+define <16 x i8> @select_s_v_v(i32 %avail, i8* %bar) {
+entry:
+ %vld1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %bar, i32 1)
+ %and = and i32 %avail, 1
+ %tobool = icmp eq i32 %and, 0
+ %vld1. = select i1 %tobool, <16 x i8> %vld1, <16 x i8> zeroinitializer
+ ret <16 x i8> %vld1.
+}
+
+declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* , i32 )
+
diff --git a/test/CodeGen/Thumb2/v8_IT_2.ll b/test/CodeGen/Thumb2/v8_IT_2.ll
new file mode 100644
index 0000000..fe88316
--- /dev/null
+++ b/test/CodeGen/Thumb2/v8_IT_2.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -mtriple=thumbv8 | FileCheck %s
+
+ %struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
+
+define fastcc i32 @CountTree(%struct.quad_struct* %tree) {
+entry:
+; CHECK-LABEL: CountTree:
+; CHECK: bne
+; CHECK: cmp
+; CHECK: it eq
+; CHECK: cmpeq
+; CHECK: bne
+; CHECK: mov
+; CHECK: pop
+ br label %tailrecurse
+
+tailrecurse: ; preds = %bb, %entry
+ %tmp6 = load %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=1]
+ %tmp9 = load %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=2]
+ %tmp12 = load %struct.quad_struct** null ; <%struct.quad_struct*> [#uses=1]
+ %tmp14 = icmp eq %struct.quad_struct* null, null ; <i1> [#uses=1]
+ %tmp17 = icmp eq %struct.quad_struct* %tmp6, null ; <i1> [#uses=1]
+ %tmp23 = icmp eq %struct.quad_struct* %tmp9, null ; <i1> [#uses=1]
+ %tmp29 = icmp eq %struct.quad_struct* %tmp12, null ; <i1> [#uses=1]
+ %bothcond = and i1 %tmp17, %tmp14 ; <i1> [#uses=1]
+ %bothcond1 = and i1 %bothcond, %tmp23 ; <i1> [#uses=1]
+ %bothcond2 = and i1 %bothcond1, %tmp29 ; <i1> [#uses=1]
+ br i1 %bothcond2, label %return, label %bb
+
+bb: ; preds = %tailrecurse
+ %tmp41 = tail call fastcc i32 @CountTree( %struct.quad_struct* %tmp9 ) ; <i32> [#uses=0]
+ br label %tailrecurse
+
+return: ; preds = %tailrecurse
+ ret i32 0
+}
+
diff --git a/test/CodeGen/Thumb2/v8_IT_3.ll b/test/CodeGen/Thumb2/v8_IT_3.ll
new file mode 100644
index 0000000..4d90891
--- /dev/null
+++ b/test/CodeGen/Thumb2/v8_IT_3.ll
@@ -0,0 +1,75 @@
+; RUN: llc < %s -mtriple=thumbv8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv8 -relocation-model=pic | FileCheck %s --check-prefix=CHECK-PIC
+
+%struct.FF = type { i32 (i32*)*, i32 (i32*, i32*, i32, i32, i32, i32)*, i32 (i32, i32, i8*)*, void ()*, i32 (i32, i8*, i32*)*, i32 ()* }
+%struct.BD = type { %struct.BD*, i32, i32, i32, i32, i64, i32 (%struct.BD*, i8*, i64, i32)*, i32 (%struct.BD*, i8*, i32, i32)*, i32 (%struct.BD*, i8*, i64, i32)*, i32 (%struct.BD*, i8*, i32, i32)*, i32 (%struct.BD*, i64, i32)*, [16 x i8], i64, i64 }
+
+@FuncPtr = external hidden unnamed_addr global %struct.FF*
+@.str1 = external hidden unnamed_addr constant [6 x i8], align 4
+@G = external unnamed_addr global i32
+@.str2 = external hidden unnamed_addr constant [58 x i8], align 4
+@.str3 = external hidden unnamed_addr constant [58 x i8], align 4
+
+define i32 @test() nounwind optsize ssp {
+entry:
+; CHECK-LABEL: test:
+; CHECK: push
+; CHECK-NOT: push
+ %block_size = alloca i32, align 4
+ %block_count = alloca i32, align 4
+ %index_cache = alloca i32, align 4
+ store i32 0, i32* %index_cache, align 4
+ %tmp = load i32* @G, align 4
+ %tmp1 = call i32 @bar(i32 0, i32 0, i32 %tmp) nounwind
+ switch i32 %tmp1, label %bb8 [
+ i32 0, label %bb
+ i32 536870913, label %bb4
+ i32 536870914, label %bb6
+ ]
+
+bb:
+ %tmp2 = load i32* @G, align 4
+ %tmp4 = icmp eq i32 %tmp2, 0
+ br i1 %tmp4, label %bb1, label %bb8
+
+bb1:
+; CHECK: %bb6
+; CHECK: it eq
+; CHECK-NEXT: ldreq
+; CHECK-NEXT: it eq
+; CHECK-NEXT: cmpeq
+; CHECK: %bb1
+ %tmp5 = load i32* %block_size, align 4
+ %tmp6 = load i32* %block_count, align 4
+ %tmp7 = call %struct.FF* @Get() nounwind
+ store %struct.FF* %tmp7, %struct.FF** @FuncPtr, align 4
+ %tmp10 = zext i32 %tmp6 to i64
+ %tmp11 = zext i32 %tmp5 to i64
+ %tmp12 = mul nsw i64 %tmp10, %tmp11
+ %tmp13 = call i32 @foo(i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0), i64 %tmp12, i32 %tmp5) nounwind
+ br label %bb8
+
+bb4:
+; CHECK-PIC: cmp
+; CHECK-PIC: cmp
+; CHECK-PIC-NEXT: bne
+; CHECK-PIC-NEXT: %bb4
+; CHECK-PIC-NEXT: movs
+; CHECK-PIC-NEXT: add
+; CHECK-PIC-NEXT: pop
+ ret i32 0
+
+bb6:
+ ret i32 1
+
+bb8:
+ ret i32 -1
+}
+
+declare i32 @printf(i8*, ...)
+
+declare %struct.FF* @Get()
+
+declare i32 @foo(i8*, i64, i32)
+
+declare i32 @bar(i32, i32, i32)
diff --git a/test/CodeGen/Thumb2/v8_IT_4.ll b/test/CodeGen/Thumb2/v8_IT_4.ll
new file mode 100644
index 0000000..45c79f4
--- /dev/null
+++ b/test/CodeGen/Thumb2/v8_IT_4.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -mtriple=thumbv8-eabi -float-abi=hard | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv8-eabi -float-abi=hard -regalloc=basic | FileCheck %s
+
+%"struct.__gnu_cxx::__normal_iterator<char*,std::basic_string<char, std::char_traits<char>, std::allocator<char> > >" = type { i8* }
+%"struct.__gnu_cxx::new_allocator<char>" = type <{ i8 }>
+%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >" = type { %"struct.__gnu_cxx::__normal_iterator<char*,std::basic_string<char, std::char_traits<char>, std::allocator<char> > >" }
+%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep_base" }
+%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep_base" = type { i32, i32, i32 }
+
+
+define weak arm_aapcs_vfpcc i32 @_ZNKSs7compareERKSs(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) {
+; CHECK-LABEL: _ZNKSs7compareERKSs:
+; CHECK: cbnz r0,
+; CHECK-NEXT: %bb
+; CHECK-NEXT: sub{{(.w)?}} r0, r{{[0-9]+}}, r{{[0-9]+}}
+; CHECK-NEXT: %bb1
+; CHECK-NEXT: pop.w
+entry:
+ %0 = tail call arm_aapcs_vfpcc i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this) ; <i32> [#uses=3]
+ %1 = tail call arm_aapcs_vfpcc i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) ; <i32> [#uses=3]
+ %2 = icmp ult i32 %1, %0 ; <i1> [#uses=1]
+ %3 = select i1 %2, i32 %1, i32 %0 ; <i32> [#uses=1]
+ %4 = tail call arm_aapcs_vfpcc i8* @_ZNKSs7_M_dataEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this) ; <i8*> [#uses=1]
+ %5 = tail call arm_aapcs_vfpcc i8* @_ZNKSs4dataEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) ; <i8*> [#uses=1]
+ %6 = tail call arm_aapcs_vfpcc i32 @memcmp(i8* %4, i8* %5, i32 %3) nounwind readonly ; <i32> [#uses=2]
+ %7 = icmp eq i32 %6, 0 ; <i1> [#uses=1]
+ br i1 %7, label %bb, label %bb1
+
+bb: ; preds = %entry
+ %8 = sub i32 %0, %1 ; <i32> [#uses=1]
+ ret i32 %8
+
+bb1: ; preds = %entry
+ ret i32 %6
+}
+
+declare arm_aapcs_vfpcc i32 @memcmp(i8* nocapture, i8* nocapture, i32) nounwind readonly
+
+declare arm_aapcs_vfpcc i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this)
+
+declare arm_aapcs_vfpcc i8* @_ZNKSs7_M_dataEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this)
+
+declare arm_aapcs_vfpcc i8* @_ZNKSs4dataEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this)
diff --git a/test/CodeGen/Thumb2/v8_IT_5.ll b/test/CodeGen/Thumb2/v8_IT_5.ll
new file mode 100644
index 0000000..3866068
--- /dev/null
+++ b/test/CodeGen/Thumb2/v8_IT_5.ll
@@ -0,0 +1,62 @@
+; RUN: llc < %s -mtriple=thumbv8 | FileCheck %s
+; CHECK: it ne
+; CHECK-NEXT: cmpne
+; CHECK-NEXT: beq
+; CHECK: cmp
+; CHECK-NEXT: beq
+; CHECK-NEXT: %if.else163
+; CHECK-NEXT: mov.w
+; CHECK-NEXT: b
+; CHECK-NEXT: %if.else145
+; CHECK-NEXT: mov.w
+
+%struct.hc = type { i32, i32, i32, i32 }
+
+define i32 @t(i32 %type) optsize {
+entry:
+ br i1 undef, label %if.then, label %if.else
+
+if.then:
+ unreachable
+
+if.else:
+ br i1 undef, label %if.then15, label %if.else18
+
+if.then15:
+ unreachable
+
+if.else18:
+ switch i32 %type, label %if.else173 [
+ i32 3, label %if.then115
+ i32 1, label %if.then102
+ ]
+
+if.then102:
+ br i1 undef, label %cond.true10.i, label %t.exit
+
+cond.true10.i:
+ br label %t.exit
+
+t.exit:
+ unreachable
+
+if.then115:
+ br i1 undef, label %if.else163, label %if.else145
+
+if.else145:
+ %call150 = call fastcc %struct.hc* @foo(%struct.hc* undef, i32 34865152) optsize
+ br label %while.body172
+
+if.else163:
+ %call168 = call fastcc %struct.hc* @foo(%struct.hc* undef, i32 34078720) optsize
+ br label %while.body172
+
+while.body172:
+ br label %while.body172
+
+if.else173:
+ ret i32 -1
+}
+
+declare hidden fastcc %struct.hc* @foo(%struct.hc* nocapture, i32) nounwind optsize
+