aboutsummaryrefslogtreecommitdiffstats
path: root/test/CodeGen/ARM
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/ARM')
-rw-r--r--test/CodeGen/ARM/2007-03-13-InstrSched.ll2
-rw-r--r--test/CodeGen/ARM/2007-04-03-PEIBug.ll2
-rw-r--r--test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll2
-rw-r--r--test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll2
-rw-r--r--test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll2
-rw-r--r--test/CodeGen/ARM/2009-04-06-AsmModifier.ll2
-rw-r--r--test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll2
-rw-r--r--test/CodeGen/ARM/2011-12-14-machine-sink.ll2
-rw-r--r--test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll8
-rw-r--r--test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll71
-rw-r--r--test/CodeGen/ARM/2012-05-29-TailDupBug.ll140
-rw-r--r--test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll41
-rw-r--r--test/CodeGen/ARM/2012-08-09-neon-extload.ll102
-rw-r--r--test/CodeGen/ARM/addrmode.ll2
-rw-r--r--test/CodeGen/ARM/aliases.ll2
-rw-r--r--test/CodeGen/ARM/bicZext.ll19
-rw-r--r--test/CodeGen/ARM/call_nolink.ll2
-rw-r--r--test/CodeGen/ARM/cmn.ll22
-rw-r--r--test/CodeGen/ARM/coalesce-subregs.ll68
-rw-r--r--test/CodeGen/ARM/cse-libcalls.ll2
-rw-r--r--test/CodeGen/ARM/data-in-code-annotations.ll42
-rw-r--r--test/CodeGen/ARM/divmod.ll16
-rw-r--r--test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll17
-rw-r--r--test/CodeGen/ARM/fast-isel-call.ll95
-rw-r--r--test/CodeGen/ARM/fast-isel-frameaddr.ll100
-rw-r--r--test/CodeGen/ARM/fast-isel-intrinsic.ll32
-rw-r--r--test/CodeGen/ARM/fast-isel.ll12
-rw-r--r--test/CodeGen/ARM/fmuls.ll9
-rw-r--r--test/CodeGen/ARM/fusedMAC.ll47
-rw-r--r--test/CodeGen/ARM/iabs.ll20
-rw-r--r--test/CodeGen/ARM/ldrd.ll5
-rw-r--r--test/CodeGen/ARM/lsr-scale-addr-mode.ll2
-rw-r--r--test/CodeGen/ARM/movt-movw-global.ll8
-rw-r--r--test/CodeGen/ARM/neon_div.ll6
-rw-r--r--test/CodeGen/ARM/opt-shuff-tstore.ll2
-rw-r--r--test/CodeGen/ARM/pr13249.ll27
-rw-r--r--test/CodeGen/ARM/select.ll26
-rw-r--r--test/CodeGen/ARM/smml.ll13
-rw-r--r--test/CodeGen/ARM/str_pre-2.ll9
-rw-r--r--test/CodeGen/ARM/str_pre.ll2
-rw-r--r--test/CodeGen/ARM/struct_byval.ll46
-rw-r--r--test/CodeGen/ARM/sub-cmp-peephole.ll65
-rw-r--r--test/CodeGen/ARM/sub.ll12
-rw-r--r--test/CodeGen/ARM/thread_pointer.ll2
-rw-r--r--test/CodeGen/ARM/thumb2-it-block.ll4
-rw-r--r--test/CodeGen/ARM/tls-models.ll117
-rw-r--r--test/CodeGen/ARM/tls1.ll6
-rw-r--r--test/CodeGen/ARM/tls3.ll2
-rw-r--r--test/CodeGen/ARM/twoaddrinstr.ll21
-rw-r--r--test/CodeGen/ARM/unsafe-fsub.ll18
-rw-r--r--test/CodeGen/ARM/vcnt.ll49
-rw-r--r--test/CodeGen/ARM/vector-extend-narrow.ll8
-rw-r--r--test/CodeGen/ARM/vlddup.ll32
-rw-r--r--test/CodeGen/ARM/vmul.ll74
-rw-r--r--test/CodeGen/ARM/vst3.ll2
55 files changed, 1358 insertions, 85 deletions
diff --git a/test/CodeGen/ARM/2007-03-13-InstrSched.ll b/test/CodeGen/ARM/2007-03-13-InstrSched.ll
index 33f935e..a63cdd4 100644
--- a/test/CodeGen/ARM/2007-03-13-InstrSched.ll
+++ b/test/CodeGen/ARM/2007-03-13-InstrSched.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \
; RUN: -mattr=+v6 | grep r9
; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \
-; RUN: -mattr=+v6 -arm-reserve-r9 -ifcvt-limit=0 -stats |& grep asm-printer
+; RUN: -mattr=+v6 -arm-reserve-r9 -ifcvt-limit=0 -stats 2>&1 | grep asm-printer
; | grep 35
define void @test(i32 %tmp56222, i32 %tmp36224, i32 %tmp46223, i32 %i.0196.0.ph, i32 %tmp8, i32* %tmp1011, i32** %tmp1, i32* %d2.1.out, i32* %d3.1.out, i32* %d0.1.out, i32* %d1.1.out) {
diff --git a/test/CodeGen/ARM/2007-04-03-PEIBug.ll b/test/CodeGen/ARM/2007-04-03-PEIBug.ll
index b543c57..8d3337c 100644
--- a/test/CodeGen/ARM/2007-04-03-PEIBug.ll
+++ b/test/CodeGen/ARM/2007-04-03-PEIBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | not grep {add.*#0}
+; RUN: llc < %s -march=arm | not grep "add.*#0"
define i32 @foo() {
entry:
diff --git a/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll b/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll
index d2eb85d..670048b 100644
--- a/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll
+++ b/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | not grep {str.*\\!}
+; RUN: llc < %s -march=arm | not grep "str.*\!"
%struct.shape_edge_t = type { %struct.shape_edge_t*, %struct.shape_edge_t*, i32, i32, i32, i32 }
%struct.shape_path_t = type { %struct.shape_edge_t*, %struct.shape_edge_t*, i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll b/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
index fd2f462..3754db0 100644
--- a/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
+++ b/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=fast
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=fast -optimize-regalloc=0
; PR1925
%struct.encode_aux_nearestmatch = type { i32*, i32*, i32*, i32*, i32, i32 }
diff --git a/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll b/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll
index 44da8e7..5fbed0d 100644
--- a/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll
+++ b/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=fast
+; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=fast -optimize-regalloc=0
; PR1925
%"struct.kc::impl_Ccode_option" = type { %"struct.kc::impl_abstract_phylum" }
diff --git a/test/CodeGen/ARM/2009-04-06-AsmModifier.ll b/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
index 3526722..7342f69 100644
--- a/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
+++ b/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | grep {swi 107}
+; RUN: llc < %s -march=arm | grep "swi 107"
define i32 @_swilseek(i32) nounwind {
entry:
diff --git a/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll b/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll
index 813bf3c..7d4cc6e 100644
--- a/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll
+++ b/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -regalloc=fast -verify-machineinstrs
+; RUN: llc < %s -regalloc=fast -optimize-regalloc=0 -verify-machineinstrs
target triple = "arm-pc-linux-gnu"
; This test case would accidentally use the same physreg for two virtregs
diff --git a/test/CodeGen/ARM/2011-12-14-machine-sink.ll b/test/CodeGen/ARM/2011-12-14-machine-sink.ll
index 5ce600d..b21bb00 100644
--- a/test/CodeGen/ARM/2011-12-14-machine-sink.ll
+++ b/test/CodeGen/ARM/2011-12-14-machine-sink.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -o /dev/null -stats |& FileCheck %s -check-prefix=STATS
+; RUN: llc < %s -o /dev/null -stats 2>&1 | FileCheck %s -check-prefix=STATS
; Radar 10266272
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
target triple = "thumbv7-apple-ios4.0.0"
diff --git a/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
index 872eca3..f1c85f1 100644
--- a/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
+++ b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
@@ -60,8 +60,16 @@ for.end: ; preds = %entry
ret void
}
+; Check that pseudo-expansion preserves <undef> flags.
+define void @foo3(i8* %p) nounwind ssp {
+entry:
+ tail call void @llvm.arm.neon.vst2.v4f32(i8* %p, <4 x float> undef, <4 x float> undef, i32 4)
+ ret void
+}
+
declare arm_aapcs_vfpcc void @bar(i8*, float, float, float)
declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind
!0 = metadata !{metadata !"omnipotent char", metadata !1}
!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll b/test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll
new file mode 100644
index 0000000..b3a7e34
--- /dev/null
+++ b/test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll
@@ -0,0 +1,71 @@
+; RUN: llc -mtriple=thumbv7-apple-ios -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 < %s
+
+; CodeGen SplitCriticalEdge() shouldn't try to break edge to a landing pad.
+; rdar://11300144
+
+%0 = type opaque
+%class.FunctionInterpreter.3.15.31 = type { %class.Parser.1.13.29, %class.Parser.1.13.29*, %struct.ParserVariable.2.14.30*, i32 }
+%class.Parser.1.13.29 = type { i32 (...)**, %class.Parser.1.13.29* }
+%struct.ParserVariable.2.14.30 = type opaque
+%struct.ParseErrorMsg.0.12.28 = type { i32, i32, i32 }
+
+@_ZTI13ParseErrorMsg = external hidden unnamed_addr constant { i8*, i8* }
+@"OBJC_IVAR_$_MUMathExpressionDoubleBased.mInterpreter" = external hidden global i32, section "__DATA, __objc_ivar", align 4
+@"\01L_OBJC_SELECTOR_REFERENCES_14" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+
+declare i8* @objc_msgSend(i8*, i8*, ...)
+
+declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+declare void @__cxa_end_catch()
+
+declare void @_ZSt9terminatev()
+
+define hidden double @t(%0* %self, i8* nocapture %_cmd) optsize ssp {
+entry:
+ %call = invoke double undef(%class.FunctionInterpreter.3.15.31* undef) optsize
+ to label %try.cont unwind label %lpad
+
+lpad: ; preds = %entry
+ %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+ catch i8* bitcast ({ i8*, i8* }* @_ZTI13ParseErrorMsg to i8*)
+ br i1 undef, label %catch, label %eh.resume
+
+catch: ; preds = %lpad
+ invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, %struct.ParseErrorMsg.0.12.28*)*)(i8* undef, i8* undef, %struct.ParseErrorMsg.0.12.28* undef) optsize
+ to label %invoke.cont2 unwind label %lpad1
+
+invoke.cont2: ; preds = %catch
+ br label %try.cont
+
+try.cont: ; preds = %invoke.cont2, %entry
+ %value.0 = phi double [ 0x7FF8000000000000, %invoke.cont2 ], [ %call, %entry ]
+ ret double %value.0
+
+lpad1: ; preds = %catch
+ %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+ cleanup
+ invoke void @__cxa_end_catch()
+ to label %eh.resume unwind label %terminate.lpad
+
+eh.resume: ; preds = %lpad1, %lpad
+ resume { i8*, i32 } undef
+
+terminate.lpad: ; preds = %lpad1
+ %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+ catch i8* null
+ unreachable
+}
+
+declare i32 @__gxx_personality_sj0(...)
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+
+!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
+!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
+!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!3 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
diff --git a/test/CodeGen/ARM/2012-05-29-TailDupBug.ll b/test/CodeGen/ARM/2012-05-29-TailDupBug.ll
new file mode 100644
index 0000000..1a57f04
--- /dev/null
+++ b/test/CodeGen/ARM/2012-05-29-TailDupBug.ll
@@ -0,0 +1,140 @@
+; RUN: llc -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -verify-machineinstrs < %s
+
+; Teach taildup to update livein set to appease verifier.
+; rdar://11538365
+
+%struct.__CFString.2 = type opaque
+
+declare void @CFRelease(i8*)
+
+define hidden fastcc i32 @t() ssp {
+entry:
+ %mylocale.i.i = alloca [256 x i8], align 1
+ br i1 undef, label %return, label %CFStringIsHyphenationAvailableForLocale.exit
+
+CFStringIsHyphenationAvailableForLocale.exit: ; preds = %entry
+ br i1 undef, label %return, label %if.end
+
+if.end: ; preds = %CFStringIsHyphenationAvailableForLocale.exit
+ br i1 undef, label %if.end8.thread.i, label %if.then.i
+
+if.then.i: ; preds = %if.end
+ br i1 undef, label %if.end8.thread.i, label %if.end8.i
+
+if.end8.thread.i: ; preds = %if.then.i, %if.end
+ unreachable
+
+if.end8.i: ; preds = %if.then.i
+ br i1 undef, label %if.then11.i, label %__CFHyphenationPullTokenizer.exit
+
+if.then11.i: ; preds = %if.end8.i
+ unreachable
+
+__CFHyphenationPullTokenizer.exit: ; preds = %if.end8.i
+ br i1 undef, label %if.end68, label %if.then3
+
+if.then3: ; preds = %__CFHyphenationPullTokenizer.exit
+ br i1 undef, label %cond.end, label %cond.false
+
+cond.false: ; preds = %if.then3
+ br label %cond.end
+
+cond.end: ; preds = %cond.false, %if.then3
+ br i1 undef, label %while.end, label %while.body
+
+while.body: ; preds = %cond.end
+ unreachable
+
+while.end: ; preds = %cond.end
+ br i1 undef, label %if.end5.i, label %if.then.i16
+
+if.then.i16: ; preds = %while.end
+ br i1 undef, label %if.then4.i, label %if.end5.i
+
+if.then4.i: ; preds = %if.then.i16
+ br i1 false, label %cleanup.thread, label %if.end.i20
+
+if.end5.i: ; preds = %if.then.i16, %while.end
+ unreachable
+
+if.end.i20: ; preds = %if.then4.i
+ br label %for.body.i146.i
+
+for.body.i146.i: ; preds = %for.body.i146.i, %if.end.i20
+ br i1 undef, label %if.end20.i, label %for.body.i146.i
+
+if.end20.i: ; preds = %for.body.i146.i
+ br i1 undef, label %cleanup.thread, label %if.end23.i
+
+if.end23.i: ; preds = %if.end20.i
+ br label %for.body.i94.i
+
+for.body.i94.i: ; preds = %for.body.i94.i, %if.end23.i
+ br i1 undef, label %if.then28.i, label %for.body.i94.i
+
+if.then28.i: ; preds = %for.body.i94.i
+ br i1 undef, label %cond.true.i26, label %land.lhs.true
+
+cond.true.i26: ; preds = %if.then28.i
+ br label %land.lhs.true
+
+land.lhs.true: ; preds = %cond.true.i26, %if.then28.i
+ br i1 false, label %cleanup.thread, label %if.end35
+
+if.end35: ; preds = %land.lhs.true
+ br i1 undef, label %cleanup.thread, label %if.end45
+
+if.end45: ; preds = %if.end35
+ br i1 undef, label %if.then50, label %if.end.i37
+
+if.end.i37: ; preds = %if.end45
+ br label %if.then50
+
+if.then50: ; preds = %if.end.i37, %if.end45
+ br i1 undef, label %__CFHyphenationGetHyphensForString.exit, label %if.end.i
+
+if.end.i: ; preds = %if.then50
+ br i1 undef, label %cleanup.i, label %cond.true.i
+
+cond.true.i: ; preds = %if.end.i
+ br i1 undef, label %for.cond16.preheader.i, label %for.cond57.preheader.i
+
+for.cond16.preheader.i: ; preds = %cond.true.i
+ %cmp1791.i = icmp sgt i32 undef, 1
+ br i1 %cmp1791.i, label %for.body18.i, label %for.cond57.preheader.i
+
+for.cond57.preheader.i: ; preds = %for.cond16.preheader.i, %cond.true.i
+ %sub69.i = add i32 undef, -2
+ br label %cleanup.i
+
+for.body18.i: ; preds = %for.cond16.preheader.i
+ store i16 0, i16* undef, align 2
+ br label %while.body.i
+
+while.body.i: ; preds = %while.body.i, %for.body18.i
+ br label %while.body.i
+
+cleanup.i: ; preds = %for.cond57.preheader.i, %if.end.i
+ br label %__CFHyphenationGetHyphensForString.exit
+
+__CFHyphenationGetHyphensForString.exit: ; preds = %cleanup.i, %if.then50
+ %retval.1.i = phi i32 [ 0, %cleanup.i ], [ -1, %if.then50 ]
+ %phitmp = bitcast %struct.__CFString.2* null to i8*
+ br label %if.end68
+
+cleanup.thread: ; preds = %if.end35, %land.lhs.true, %if.end20.i, %if.then4.i
+ call void @llvm.stackrestore(i8* null)
+ br label %return
+
+if.end68: ; preds = %__CFHyphenationGetHyphensForString.exit, %__CFHyphenationPullTokenizer.exit
+ %hyphenCount.2 = phi i32 [ %retval.1.i, %__CFHyphenationGetHyphensForString.exit ], [ 0, %__CFHyphenationPullTokenizer.exit ]
+ %_token.1 = phi i8* [ %phitmp, %__CFHyphenationGetHyphensForString.exit ], [ undef, %__CFHyphenationPullTokenizer.exit ]
+ call void @CFRelease(i8* %_token.1)
+ br label %return
+
+return: ; preds = %if.end68, %cleanup.thread, %CFStringIsHyphenationAvailableForLocale.exit, %entry
+ %retval.1 = phi i32 [ %hyphenCount.2, %if.end68 ], [ -1, %CFStringIsHyphenationAvailableForLocale.exit ], [ -1, %cleanup.thread ], [ -1, %entry ]
+ ret i32 %retval.1
+}
+
+declare void @llvm.stackrestore(i8*) nounwind
diff --git a/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll b/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll
new file mode 100644
index 0000000..b05ec63
--- /dev/null
+++ b/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -o /dev/null "-mtriple=thumbv7-apple-ios" -debug-only=post-RA-sched 2> %t
+; RUN: FileCheck %s < %t
+; REQUIRES: asserts
+; Make sure that mayalias store-load dependencies have one cycle
+; latency regardless of whether they are barriers or not.
+
+; CHECK: ** List Scheduling
+; CHECK: SU(2){{.*}}STR{{.*}}Volatile
+; CHECK-NOT: ch SU
+; CHECK: ch SU(3): Latency=1
+; CHECK-NOT: ch SU
+; CHECK: SU(3){{.*}}LDR{{.*}}Volatile
+; CHECK-NOT: ch SU
+; CHECK: ch SU(2): Latency=1
+; CHECK-NOT: ch SU
+; CHECK: ** List Scheduling
+; CHECK: SU(2){{.*}}STR{{.*}}
+; CHECK-NOT: ch SU
+; CHECK: ch SU(3): Latency=1
+; CHECK-NOT: ch SU
+; CHECK: SU(3){{.*}}LDR{{.*}}
+; CHECK-NOT: ch SU
+; CHECK: ch SU(2): Latency=1
+; CHECK-NOT: ch SU
+define i32 @f1(i32* nocapture %p1, i32* nocapture %p2) nounwind {
+entry:
+ store volatile i32 65540, i32* %p1, align 4, !tbaa !0
+ %0 = load volatile i32* %p2, align 4, !tbaa !0
+ ret i32 %0
+}
+
+define i32 @f2(i32* nocapture %p1, i32* nocapture %p2) nounwind {
+entry:
+ store i32 65540, i32* %p1, align 4, !tbaa !0
+ %0 = load i32* %p2, align 4, !tbaa !0
+ ret i32 %0
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/ARM/2012-08-09-neon-extload.ll b/test/CodeGen/ARM/2012-08-09-neon-extload.ll
new file mode 100644
index 0000000..b55f1ca
--- /dev/null
+++ b/test/CodeGen/ARM/2012-08-09-neon-extload.ll
@@ -0,0 +1,102 @@
+; RUN: llc -mtriple=armv7-none-linux-gnueabi < %s | FileCheck %s
+
+@var_v2i8 = global <2 x i8> zeroinitializer
+@var_v4i8 = global <4 x i8> zeroinitializer
+
+@var_v2i16 = global <2 x i16> zeroinitializer
+@var_v4i16 = global <4 x i16> zeroinitializer
+
+@var_v2i32 = global <2 x i32> zeroinitializer
+@var_v4i32 = global <4 x i32> zeroinitializer
+
+@var_v2i64 = global <2 x i64> zeroinitializer
+
+define void @test_v2i8tov2i32() {
+; CHECK: test_v2i8tov2i32:
+
+ %i8val = load <2 x i8>* @var_v2i8
+
+ %i32val = sext <2 x i8> %i8val to <2 x i32>
+ store <2 x i32> %i32val, <2 x i32>* @var_v2i32
+; CHECK: vld1.16 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}, :16]
+; CHECK: vmovl.s8 {{q[0-9]+}}, d[[LOAD]]
+; CHECK: vmovl.s16 {{q[0-9]+}}, {{d[0-9]+}}
+
+ ret void
+}
+
+define void @test_v2i8tov2i64() {
+; CHECK: test_v2i8tov2i64:
+
+ %i8val = load <2 x i8>* @var_v2i8
+
+ %i64val = sext <2 x i8> %i8val to <2 x i64>
+ store <2 x i64> %i64val, <2 x i64>* @var_v2i64
+; CHECK: vld1.16 {d{{[0-9]+}}[0]}, [{{r[0-9]+}}, :16]
+; CHECK: vmovl.s8 {{q[0-9]+}}, d[[LOAD]]
+; CHECK: vmovl.s16 {{q[0-9]+}}, {{d[0-9]+}}
+; CHECK: vmovl.s32 {{q[0-9]+}}, {{d[0-9]+}}
+
+; %i64val = sext <2 x i8> %i8val to <2 x i64>
+; store <2 x i64> %i64val, <2 x i64>* @var_v2i64
+
+ ret void
+}
+
+define void @test_v4i8tov4i16() {
+; CHECK: test_v4i8tov4i16:
+
+ %i8val = load <4 x i8>* @var_v4i8
+
+ %i16val = sext <4 x i8> %i8val to <4 x i16>
+ store <4 x i16> %i16val, <4 x i16>* @var_v4i16
+; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}, :32]
+; CHECK: vmovl.s8 {{q[0-9]+}}, d[[LOAD]]
+; CHECK-NOT: vmovl.s16
+
+ ret void
+; CHECK: bx lr
+}
+
+define void @test_v4i8tov4i32() {
+; CHECK: test_v4i8tov4i32:
+
+ %i8val = load <4 x i8>* @var_v4i8
+
+ %i16val = sext <4 x i8> %i8val to <4 x i32>
+ store <4 x i32> %i16val, <4 x i32>* @var_v4i32
+; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}, :32]
+; CHECK: vmovl.s8 {{q[0-9]+}}, d[[LOAD]]
+; CHECK: vmovl.s16 {{q[0-9]+}}, {{d[0-9]+}}
+
+ ret void
+}
+
+define void @test_v2i16tov2i32() {
+; CHECK: test_v2i16tov2i32:
+
+ %i16val = load <2 x i16>* @var_v2i16
+
+ %i32val = sext <2 x i16> %i16val to <2 x i32>
+ store <2 x i32> %i32val, <2 x i32>* @var_v2i32
+; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}, :32]
+; CHECK: vmovl.s16 {{q[0-9]+}}, d[[LOAD]]
+; CHECK-NOT: vmovl
+
+ ret void
+; CHECK: bx lr
+}
+
+define void @test_v2i16tov2i64() {
+; CHECK: test_v2i16tov2i64:
+
+ %i16val = load <2 x i16>* @var_v2i16
+
+ %i64val = sext <2 x i16> %i16val to <2 x i64>
+ store <2 x i64> %i64val, <2 x i64>* @var_v2i64
+; CHECK: vld1.32 {d[[LOAD:[0-9]+]][0]}, [{{r[0-9]+}}, :32]
+; CHECK: vmovl.s16 {{q[0-9]+}}, d[[LOAD]]
+; CHECK: vmovl.s32 {{q[0-9]+}}, d[[LOAD]]
+
+ ret void
+}
diff --git a/test/CodeGen/ARM/addrmode.ll b/test/CodeGen/ARM/addrmode.ll
index 9ccff07..6da9089 100644
--- a/test/CodeGen/ARM/addrmode.ll
+++ b/test/CodeGen/ARM/addrmode.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -stats |& grep asm-printer | grep 4
+; RUN: llc < %s -march=arm -stats 2>&1 | grep asm-printer | grep 4
define i32 @t1(i32 %a) {
%b = mul i32 %a, 9
diff --git a/test/CodeGen/ARM/aliases.ll b/test/CodeGen/ARM/aliases.ll
index 31c5007..d668334 100644
--- a/test/CodeGen/ARM/aliases.ll
+++ b/test/CodeGen/ARM/aliases.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=arm-linux-gnueabi -o %t
-; RUN: grep { = } %t | count 5
+; RUN: grep " = " %t | count 5
; RUN: grep globl %t | count 4
; RUN: grep weak %t | count 1
diff --git a/test/CodeGen/ARM/bicZext.ll b/test/CodeGen/ARM/bicZext.ll
new file mode 100644
index 0000000..cf4b7ba
--- /dev/null
+++ b/test/CodeGen/ARM/bicZext.ll
@@ -0,0 +1,19 @@
+; RUN: llc %s -o - | FileCheck %s
+; ModuleID = 'bic.c'
+target triple = "thumbv7-apple-ios3.0.0"
+
+define zeroext i16 @foo16(i16 zeroext %f) nounwind readnone optsize ssp {
+entry:
+ ; CHECK: .thumb_func _foo16
+ ; CHECK: {{bic[^#]*#3}}
+ %and = and i16 %f, -4
+ ret i16 %and
+}
+
+define i32 @foo32(i32 %f) nounwind readnone optsize ssp {
+entry:
+ ; CHECK: .thumb_func _foo32
+ ; CHECK: {{bic[^#]*#3}}
+ %and = and i32 %f, -4
+ ret i32 %and
+}
diff --git a/test/CodeGen/ARM/call_nolink.ll b/test/CodeGen/ARM/call_nolink.ll
index efe29d8..00b16888 100644
--- a/test/CodeGen/ARM/call_nolink.ll
+++ b/test/CodeGen/ARM/call_nolink.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
-; RUN: not grep {bx lr}
+; RUN: not grep "bx lr"
%struct.anon = type { i32 (i32, i32, i32)*, i32, i32, [3 x i32], i8*, i8*, i8* }
@r = external global [14 x i32] ; <[14 x i32]*> [#uses=4]
diff --git a/test/CodeGen/ARM/cmn.ll b/test/CodeGen/ARM/cmn.ll
new file mode 100644
index 0000000..ef73165
--- /dev/null
+++ b/test/CodeGen/ARM/cmn.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple thumbv7-apple-ios | FileCheck %s
+; <rdar://problem/7569620>
+
+define i32 @compare_i_gt(i32 %a) {
+entry:
+; CHECK: compare_i_gt
+; CHECK-NOT: mvn
+; CHECK: cmn
+ %cmp = icmp sgt i32 %a, -78
+ %. = zext i1 %cmp to i32
+ ret i32 %.
+}
+
+define i32 @compare_r_eq(i32 %a, i32 %b) {
+entry:
+; CHECK: compare_r_eq
+; CHECK: cmn
+ %sub = sub nsw i32 0, %b
+ %cmp = icmp eq i32 %a, %sub
+ %. = zext i1 %cmp to i32
+ ret i32 %.
+}
diff --git a/test/CodeGen/ARM/coalesce-subregs.ll b/test/CodeGen/ARM/coalesce-subregs.ll
new file mode 100644
index 0000000..fb0f4c6
--- /dev/null
+++ b/test/CodeGen/ARM/coalesce-subregs.ll
@@ -0,0 +1,68 @@
+; RUN: llc < %s -mcpu=cortex-a9 | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios0.0.0"
+
+; CHECK: f
+; The vld2 and vst2 are not aligned wrt each other, the second Q loaded is the
+; first one stored.
+; The coalescer must find a super-register larger than QQ to eliminate the copy
+; setting up the vst2 data.
+; CHECK: vld2
+; CHECK-NOT: vorr
+; CHECK-NOT: vmov
+; CHECK: vst2
+define void @f(float* %p, i32 %c) nounwind ssp {
+entry:
+ %0 = bitcast float* %p to i8*
+ %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %0, i32 4)
+ %vld221 = extractvalue { <4 x float>, <4 x float> } %vld2, 1
+ %add.ptr = getelementptr inbounds float* %p, i32 8
+ %1 = bitcast float* %add.ptr to i8*
+ tail call void @llvm.arm.neon.vst2.v4f32(i8* %1, <4 x float> %vld221, <4 x float> undef, i32 4)
+ ret void
+}
+
+; CHECK: f1
+; FIXME: This function still has copies.
+define void @f1(float* %p, i32 %c) nounwind ssp {
+entry:
+ %0 = bitcast float* %p to i8*
+ %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %0, i32 4)
+ %vld221 = extractvalue { <4 x float>, <4 x float> } %vld2, 1
+ %add.ptr = getelementptr inbounds float* %p, i32 8
+ %1 = bitcast float* %add.ptr to i8*
+ %vld22 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %1, i32 4)
+ %vld2215 = extractvalue { <4 x float>, <4 x float> } %vld22, 0
+ tail call void @llvm.arm.neon.vst2.v4f32(i8* %1, <4 x float> %vld221, <4 x float> %vld2215, i32 4)
+ ret void
+}
+
+; CHECK: f2
+; FIXME: This function still has copies.
+define void @f2(float* %p, i32 %c) nounwind ssp {
+entry:
+ %0 = bitcast float* %p to i8*
+ %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %0, i32 4)
+ %vld224 = extractvalue { <4 x float>, <4 x float> } %vld2, 1
+ br label %do.body
+
+do.body: ; preds = %do.body, %entry
+ %qq0.0.1.0 = phi <4 x float> [ %vld224, %entry ], [ %vld2216, %do.body ]
+ %c.addr.0 = phi i32 [ %c, %entry ], [ %dec, %do.body ]
+ %p.addr.0 = phi float* [ %p, %entry ], [ %add.ptr, %do.body ]
+ %add.ptr = getelementptr inbounds float* %p.addr.0, i32 8
+ %1 = bitcast float* %add.ptr to i8*
+ %vld22 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %1, i32 4)
+ %vld2215 = extractvalue { <4 x float>, <4 x float> } %vld22, 0
+ %vld2216 = extractvalue { <4 x float>, <4 x float> } %vld22, 1
+ tail call void @llvm.arm.neon.vst2.v4f32(i8* %1, <4 x float> %qq0.0.1.0, <4 x float> %vld2215, i32 4)
+ %dec = add nsw i32 %c.addr.0, -1
+ %tobool = icmp eq i32 %dec, 0
+ br i1 %tobool, label %do.end, label %do.body
+
+do.end: ; preds = %do.body
+ ret void
+}
+
+declare { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8*, i32) nounwind readonly
+declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind
diff --git a/test/CodeGen/ARM/cse-libcalls.ll b/test/CodeGen/ARM/cse-libcalls.ll
index 1d011be..62b9e43 100644
--- a/test/CodeGen/ARM/cse-libcalls.ll
+++ b/test/CodeGen/ARM/cse-libcalls.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | grep {bl.\*__ltdf} | count 1
+; RUN: llc < %s -march=arm | grep "bl.*__ltdf" | count 1
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "i386-apple-darwin8"
diff --git a/test/CodeGen/ARM/data-in-code-annotations.ll b/test/CodeGen/ARM/data-in-code-annotations.ll
new file mode 100644
index 0000000..a66a9d1
--- /dev/null
+++ b/test/CodeGen/ARM/data-in-code-annotations.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+
+define double @f1() nounwind {
+; CHECK: f1:
+; CHECK: .data_region
+; CHECK: .long 1413754129
+; CHECK: .long 1074340347
+; CHECK: .end_data_region
+ ret double 0x400921FB54442D11
+}
+
+
+define i32 @f2() {
+; CHECK: f2:
+; CHECK: .data_region jt32
+; CHECK: .end_data_region
+
+entry:
+ switch i32 undef, label %return [
+ i32 1, label %sw.bb
+ i32 2, label %sw.bb6
+ i32 3, label %sw.bb13
+ i32 4, label %sw.bb20
+ ]
+
+sw.bb: ; preds = %entry
+ br label %return
+
+sw.bb6: ; preds = %entry
+ br label %return
+
+sw.bb13: ; preds = %entry
+ br label %return
+
+sw.bb20: ; preds = %entry
+ %div = sdiv i32 undef, undef
+ br label %return
+
+return: ; preds = %sw.bb20, %sw.bb13, %sw.bb6, %sw.bb, %entry
+ %retval.0 = phi i32 [ %div, %sw.bb20 ], [ undef, %sw.bb13 ], [ undef, %sw.bb6 ], [ undef, %sw.bb ], [ 0, %entry ]
+ ret i32 %retval.0
+}
diff --git a/test/CodeGen/ARM/divmod.ll b/test/CodeGen/ARM/divmod.ll
index 49c4103..7fbf8f4 100644
--- a/test/CodeGen/ARM/divmod.ll
+++ b/test/CodeGen/ARM/divmod.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm-apple-ios5.0 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios5.0 -mcpu=cortex-a8 | FileCheck %s
define void @foo(i32 %x, i32 %y, i32* nocapture %P) nounwind ssp {
entry:
@@ -56,3 +56,17 @@ bb1:
declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readnone
declare i8* @__memset_chk(i8*, i32, i32, i32) nounwind
+
+; rdar://11714607
+define i32 @howmany(i32 %x, i32 %y) nounwind {
+entry:
+; CHECK: howmany:
+; CHECK: bl ___udivmodsi4
+; CHECK-NOT: ___udivsi3
+ %rem = urem i32 %x, %y
+ %div = udiv i32 %x, %y
+ %not.cmp = icmp ne i32 %rem, 0
+ %add = zext i1 %not.cmp to i32
+ %cond = add i32 %add, %div
+ ret i32 %cond
+}
diff --git a/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll b/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll
new file mode 100644
index 0000000..14721a4
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+; Fast-isel can't handle non-double multi-reg retvals.
+; This test just check to make sure we don't hit the assert in FinishCall.
+define <16 x i8> @foo() nounwind ssp {
+entry:
+ ret <16 x i8> zeroinitializer
+}
+
+define void @t1() nounwind ssp {
+entry:
+; ARM: @t1
+; THUMB: @t1
+ %call = call <16 x i8> @foo()
+ ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-call.ll b/test/CodeGen/ARM/fast-isel-call.ll
index dd460b2..edc805a 100644
--- a/test/CodeGen/ARM/fast-isel-call.ll
+++ b/test/CodeGen/ARM/fast-isel-call.ll
@@ -1,5 +1,7 @@
; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=THUMB-LONG
define i32 @t0(i1 zeroext %a) nounwind {
%1 = zext i1 %a to i32
@@ -99,6 +101,11 @@ entry:
; ARM: uxtb r9, r12
; ARM: str r9, [sp, #4]
; ARM: bl _bar
+; ARM-LONG: @t10
+; ARM-LONG: movw lr, :lower16:L_bar$non_lazy_ptr
+; ARM-LONG: movt lr, :upper16:L_bar$non_lazy_ptr
+; ARM-LONG: ldr lr, [lr]
+; ARM-LONG: blx lr
; THUMB: @t10
; THUMB: movs r0, #0
; THUMB: movt r0, #0
@@ -121,8 +128,96 @@ entry:
; THUMB: uxtb.w r9, r12
; THUMB: str.w r9, [sp, #4]
; THUMB: bl _bar
+; THUMB-LONG: @t10
+; THUMB-LONG: movw lr, :lower16:L_bar$non_lazy_ptr
+; THUMB-LONG: movt lr, :upper16:L_bar$non_lazy_ptr
+; THUMB-LONG: ldr.w lr, [lr]
+; THUMB-LONG: blx lr
%call = call i32 @bar(i8 zeroext 0, i8 zeroext -8, i8 zeroext -69, i8 zeroext 28, i8 zeroext 40, i8 zeroext -70)
ret i32 0
}
declare i32 @bar(i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext)
+
+define i32 @bar0(i32 %i) nounwind {
+ ret i32 0
+}
+
+define void @foo3() uwtable {
+; ARM: movw r0, #0
+; ARM: movw r1, :lower16:_bar0
+; ARM: movt r1, :upper16:_bar0
+; ARM: blx r1
+; THUMB: movs r0, #0
+; THUMB: movw r1, :lower16:_bar0
+; THUMB: movt r1, :upper16:_bar0
+; THUMB: blx r1
+ %fptr = alloca i32 (i32)*, align 8
+ store i32 (i32)* @bar0, i32 (i32)** %fptr, align 8
+ %1 = load i32 (i32)** %fptr, align 8
+ %call = call i32 %1(i32 0)
+ ret void
+}
+
+define i32 @LibCall(i32 %a, i32 %b) {
+entry:
+; ARM: LibCall
+; ARM: bl ___udivsi3
+; ARM-LONG: LibCall
+; ARM-LONG: movw r2, :lower16:L___udivsi3$non_lazy_ptr
+; ARM-LONG: movt r2, :upper16:L___udivsi3$non_lazy_ptr
+; ARM-LONG: ldr r2, [r2]
+; ARM-LONG: blx r2
+; THUMB: LibCall
+; THUMB: bl ___udivsi3
+; THUMB-LONG: LibCall
+; THUMB-LONG: movw r2, :lower16:L___udivsi3$non_lazy_ptr
+; THUMB-LONG: movt r2, :upper16:L___udivsi3$non_lazy_ptr
+; THUMB-LONG: ldr r2, [r2]
+; THUMB-LONG: blx r2
+ %tmp1 = udiv i32 %a, %b ; <i32> [#uses=1]
+ ret i32 %tmp1
+}
+
+define i32 @VarArg() nounwind {
+entry:
+ %i = alloca i32, align 4
+ %j = alloca i32, align 4
+ %k = alloca i32, align 4
+ %m = alloca i32, align 4
+ %n = alloca i32, align 4
+ %tmp = alloca i32, align 4
+ %0 = load i32* %i, align 4
+ %1 = load i32* %j, align 4
+ %2 = load i32* %k, align 4
+ %3 = load i32* %m, align 4
+ %4 = load i32* %n, align 4
+; ARM: VarArg
+; ARM: mov r7, sp
+; ARM: movw r0, #5
+; ARM: ldr r1, [r7, #-4]
+; ARM: ldr r2, [r7, #-8]
+; ARM: ldr r3, [r7, #-12]
+; ARM: ldr r9, [sp, #16]
+; ARM: ldr r12, [sp, #12]
+; ARM: str r9, [sp]
+; ARM: str r12, [sp, #4]
+; ARM: bl _CallVariadic
+; THUMB: mov r7, sp
+; THUMB: movs r0, #5
+; THUMB: movt r0, #0
+; THUMB: ldr r1, [sp, #28]
+; THUMB: ldr r2, [sp, #24]
+; THUMB: ldr r3, [sp, #20]
+; THUMB: ldr.w r9, [sp, #16]
+; THUMB: ldr.w r12, [sp, #12]
+; THUMB: str.w r9, [sp]
+; THUMB: str.w r12, [sp, #4]
+; THUMB: bl _CallVariadic
+ %call = call i32 (i32, ...)* @CallVariadic(i32 5, i32 %0, i32 %1, i32 %2, i32 %3, i32 %4)
+ store i32 %call, i32* %tmp, align 4
+ %5 = load i32* %tmp, align 4
+ ret i32 %5
+}
+
+declare i32 @CallVariadic(i32, ...)
diff --git a/test/CodeGen/ARM/fast-isel-frameaddr.ll b/test/CodeGen/ARM/fast-isel-frameaddr.ll
new file mode 100644
index 0000000..8f7b294
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-frameaddr.ll
@@ -0,0 +1,100 @@
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=DARWIN-ARM
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=LINUX-ARM
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=DARWIN-THUMB2
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=thumbv7-linux-gnueabi | FileCheck %s --check-prefix=LINUX-THUMB2
+
+define i8* @frameaddr_index0() nounwind {
+entry:
+; DARWIN-ARM: frameaddr_index0:
+; DARWIN-ARM: push {r7}
+; DARWIN-ARM: mov r7, sp
+; DARWIN-ARM: mov r0, r7
+
+; DARWIN-THUMB2: frameaddr_index0:
+; DARWIN-THUMB2: str r7, [sp, #-4]!
+; DARWIN-THUMB2: mov r7, sp
+; DARWIN-THUMB2: mov r0, r7
+
+; LINUX-ARM: frameaddr_index0:
+; LINUX-ARM: push {r11}
+; LINUX-ARM: mov r11, sp
+; LINUX-ARM: mov r0, r11
+
+; LINUX-THUMB2: frameaddr_index0:
+; LINUX-THUMB2: str r7, [sp, #-4]!
+; LINUX-THUMB2: mov r7, sp
+; LINUX-THUMB2: mov r0, r7
+
+ %0 = call i8* @llvm.frameaddress(i32 0)
+ ret i8* %0
+}
+
+define i8* @frameaddr_index1() nounwind {
+entry:
+; DARWIN-ARM: frameaddr_index1:
+; DARWIN-ARM: push {r7}
+; DARWIN-ARM: mov r7, sp
+; DARWIN-ARM: mov r0, r7
+; DARWIN-ARM: ldr r0, [r0]
+
+; DARWIN-THUMB2: frameaddr_index1:
+; DARWIN-THUMB2: str r7, [sp, #-4]!
+; DARWIN-THUMB2: mov r7, sp
+; DARWIN-THUMB2: mov r0, r7
+; DARWIN-THUMB2: ldr r0, [r0]
+
+; LINUX-ARM: frameaddr_index1:
+; LINUX-ARM: push {r11}
+; LINUX-ARM: mov r11, sp
+; LINUX-ARM: mov r0, r11
+; LINUX-ARM: ldr r0, [r0]
+
+; LINUX-THUMB2: frameaddr_index1:
+; LINUX-THUMB2: str r7, [sp, #-4]!
+; LINUX-THUMB2: mov r7, sp
+; LINUX-THUMB2: mov r0, r7
+; LINUX-THUMB2: ldr r0, [r0]
+
+ %0 = call i8* @llvm.frameaddress(i32 1)
+ ret i8* %0
+}
+
+define i8* @frameaddr_index3() nounwind {
+entry:
+; DARWIN-ARM: frameaddr_index3:
+; DARWIN-ARM: push {r7}
+; DARWIN-ARM: mov r7, sp
+; DARWIN-ARM: mov r0, r7
+; DARWIN-ARM: ldr r0, [r0]
+; DARWIN-ARM: ldr r0, [r0]
+; DARWIN-ARM: ldr r0, [r0]
+
+; DARWIN-THUMB2: frameaddr_index3:
+; DARWIN-THUMB2: str r7, [sp, #-4]!
+; DARWIN-THUMB2: mov r7, sp
+; DARWIN-THUMB2: mov r0, r7
+; DARWIN-THUMB2: ldr r0, [r0]
+; DARWIN-THUMB2: ldr r0, [r0]
+; DARWIN-THUMB2: ldr r0, [r0]
+
+; LINUX-ARM: frameaddr_index3:
+; LINUX-ARM: push {r11}
+; LINUX-ARM: mov r11, sp
+; LINUX-ARM: mov r0, r11
+; LINUX-ARM: ldr r0, [r0]
+; LINUX-ARM: ldr r0, [r0]
+; LINUX-ARM: ldr r0, [r0]
+
+; LINUX-THUMB2: frameaddr_index3:
+; LINUX-THUMB2: str r7, [sp, #-4]!
+; LINUX-THUMB2: mov r7, sp
+; LINUX-THUMB2: mov r0, r7
+; LINUX-THUMB2: ldr r0, [r0]
+; LINUX-THUMB2: ldr r0, [r0]
+; LINUX-THUMB2: ldr r0, [r0]
+
+ %0 = call i8* @llvm.frameaddress(i32 3)
+ ret i8* %0
+}
+
+declare i8* @llvm.frameaddress(i32) nounwind readnone
diff --git a/test/CodeGen/ARM/fast-isel-intrinsic.ll b/test/CodeGen/ARM/fast-isel-intrinsic.ll
index e6bdfa7..b73fcef 100644
--- a/test/CodeGen/ARM/fast-isel-intrinsic.ll
+++ b/test/CodeGen/ARM/fast-isel-intrinsic.ll
@@ -1,5 +1,7 @@
; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=THUMB-LONG
@message1 = global [60 x i8] c"The LLVM Compiler Infrastructure\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 1
@temp = common global [60 x i8] zeroinitializer, align 1
@@ -13,6 +15,11 @@ define void @t1() nounwind ssp {
; ARM: movw r2, #10
; ARM: uxtb r1, r1
; ARM: bl _memset
+; ARM-LONG: t1
+; ARM-LONG: movw r3, :lower16:L_memset$non_lazy_ptr
+; ARM-LONG: movt r3, :upper16:L_memset$non_lazy_ptr
+; ARM-LONG: ldr r3, [r3]
+; ARM-LONG: blx r3
; THUMB: t1
; THUMB: movw r0, :lower16:_message1
; THUMB: movt r0, :upper16:_message1
@@ -23,6 +30,11 @@ define void @t1() nounwind ssp {
; THUMB: movt r2, #0
; THUMB: uxtb r1, r1
; THUMB: bl _memset
+; THUMB-LONG: t1
+; THUMB-LONG: movw r3, :lower16:L_memset$non_lazy_ptr
+; THUMB-LONG: movt r3, :upper16:L_memset$non_lazy_ptr
+; THUMB-LONG: ldr r3, [r3]
+; THUMB-LONG: blx r3
call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @message1, i32 0, i32 5), i8 64, i32 10, i32 1, i1 false)
ret void
}
@@ -41,6 +53,11 @@ define void @t2() nounwind ssp {
; ARM: mov r0, r1
; ARM: ldr r1, [sp] @ 4-byte Reload
; ARM: bl _memcpy
+; ARM-LONG: t2
+; ARM-LONG: movw r3, :lower16:L_memcpy$non_lazy_ptr
+; ARM-LONG: movt r3, :upper16:L_memcpy$non_lazy_ptr
+; ARM-LONG: ldr r3, [r3]
+; ARM-LONG: blx r3
; THUMB: t2
; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
@@ -51,6 +68,11 @@ define void @t2() nounwind ssp {
; THUMB: movt r2, #0
; THUMB: mov r0, r1
; THUMB: bl _memcpy
+; THUMB-LONG: t2
+; THUMB-LONG: movw r3, :lower16:L_memcpy$non_lazy_ptr
+; THUMB-LONG: movt r3, :upper16:L_memcpy$non_lazy_ptr
+; THUMB-LONG: ldr r3, [r3]
+; THUMB-LONG: blx r3
call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 17, i32 1, i1 false)
ret void
}
@@ -67,6 +89,11 @@ define void @t3() nounwind ssp {
; ARM: movw r2, #10
; ARM: mov r0, r1
; ARM: bl _memmove
+; ARM-LONG: t3
+; ARM-LONG: movw r3, :lower16:L_memmove$non_lazy_ptr
+; ARM-LONG: movt r3, :upper16:L_memmove$non_lazy_ptr
+; ARM-LONG: ldr r3, [r3]
+; ARM-LONG: blx r3
; THUMB: t3
; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
@@ -77,6 +104,11 @@ define void @t3() nounwind ssp {
; THUMB: movt r2, #0
; THUMB: mov r0, r1
; THUMB: bl _memmove
+; THUMB-LONG: t3
+; THUMB-LONG: movw r3, :lower16:L_memmove$non_lazy_ptr
+; THUMB-LONG: movt r3, :upper16:L_memmove$non_lazy_ptr
+; THUMB-LONG: ldr r3, [r3]
+; THUMB-LONG: blx r3
call void @llvm.memmove.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
ret void
}
diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll
index 417e2d9..ecd5fe2 100644
--- a/test/CodeGen/ARM/fast-isel.ll
+++ b/test/CodeGen/ARM/fast-isel.ll
@@ -226,3 +226,15 @@ define i32 @urem_fold(i32 %a) nounwind {
%rem = urem i32 %a, 32
ret i32 %rem
}
+
+define i32 @test7() noreturn nounwind {
+entry:
+; ARM: @test7
+; THUMB: @test7
+; ARM: trap
+; THUMB: trap
+ tail call void @llvm.trap( )
+ unreachable
+}
+
+declare void @llvm.trap() nounwind
diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll
index bc118b8..3c3182b 100644
--- a/test/CodeGen/ARM/fmuls.ll
+++ b/test/CodeGen/ARM/fmuls.ll
@@ -21,3 +21,12 @@ entry:
; CORTEXA8: vmul.f32 d0, d1, d0
; CORTEXA9: test:
; CORTEXA9: vmul.f32 s{{.}}, s{{.}}, s{{.}}
+
+; VFP2: test2
+define float @test2(float %a) nounwind {
+; CHECK-NOT: mul
+; CHECK: mov pc, lr
+ %ret = fmul float %a, 1.0
+ ret float %ret
+}
+
diff --git a/test/CodeGen/ARM/fusedMAC.ll b/test/CodeGen/ARM/fusedMAC.ll
index 802d1b8..303d165 100644
--- a/test/CodeGen/ARM/fusedMAC.ll
+++ b/test/CodeGen/ARM/fusedMAC.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv7-eabi -mattr=+neon,+vfp4 | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-eabi -mattr=+neon,+vfp4 -fp-contract=fast | FileCheck %s
; Check generated fused MAC and MLS.
define double @fusedMACTest1(double %d1, double %d2, double %d3) {
@@ -138,8 +138,16 @@ entry:
; CHECK: vfms.f64
%tmp1 = fsub double -0.0, %b
%tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone
- %tmp3 = fsub double -0.0, %tmp2
- ret double %tmp3
+ ret double %tmp2
+}
+
+define float @test_fnms_f32(float %a, float %b, float* %c) nounwind readnone ssp {
+; CHECK: test_fnms_f32
+; CHECK: vfnms.f32
+ %tmp1 = load float* %c, align 4
+ %tmp2 = fsub float -0.0, %tmp1
+ %tmp3 = tail call float @llvm.fma.f32(float %a, float %b, float %tmp2) nounwind readnone
+ ret float %tmp3
}
define double @test_fnms_f64(double %a, double %b, double %c) nounwind readnone ssp {
@@ -158,7 +166,8 @@ entry:
; CHECK: vfnms.f64
%tmp1 = fsub double -0.0, %b
%tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone
- ret double %tmp2
+ %tmp3 = fsub double -0.0, %tmp2
+ ret double %tmp3
}
define double @test_fnma_f64(double %a, double %b, double %c) nounwind readnone ssp {
@@ -180,6 +189,36 @@ entry:
ret double %tmp3
}
+define float @test_fma_const_fold(float %a, float %b) nounwind {
+; CHECK: test_fma_const_fold
+; CHECK-NOT: vfma
+; CHECK-NOT: vmul
+; CHECK: vadd
+ %ret = call float @llvm.fma.f32(float %a, float 1.0, float %b)
+ ret float %ret
+}
+
+define float @test_fma_canonicalize(float %a, float %b) nounwind {
+; CHECK: test_fma_canonicalize
+; CHECK: vmov.f32 [[R1:s[0-9]+]], #2.000000e+00
+; CHECK: vfma.f32 {{s[0-9]+}}, {{s[0-9]+}}, [[R1]]
+ %ret = call float @llvm.fma.f32(float 2.0, float %a, float %b)
+ ret float %ret
+}
+
+; Check that very wide vector fma's can be split into legal fma's.
+define void @test_fma_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float>* %p) nounwind readnone ssp {
+; CHECK: test_fma_v8f32
+; CHECK: vfma.f32
+; CHECK: vfma.f32
+entry:
+ %call = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) nounwind readnone
+ store <8 x float> %call, <8 x float>* %p, align 16
+ ret void
+}
+
+
declare float @llvm.fma.f32(float, float, float) nounwind readnone
declare double @llvm.fma.f64(double, double, double) nounwind readnone
declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone
+declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/iabs.ll b/test/CodeGen/ARM/iabs.ll
index 89e309d..600a8c2 100644
--- a/test/CodeGen/ARM/iabs.ll
+++ b/test/CodeGen/ARM/iabs.ll
@@ -10,7 +10,25 @@ define i32 @test(i32 %a) {
%b = icmp sgt i32 %a, -1
%abs = select i1 %b, i32 %a, i32 %tmp1neg
ret i32 %abs
-; CHECK: movs r0, r0
+; CHECK: cmp
; CHECK: rsbmi r0, r0, #0
; CHECK: bx lr
}
+
+; rdar://11633193
+;; 3 instructions will be generated for abs(a-b):
+;; subs
+;; rsbmi
+;; bx
+define i32 @test2(i32 %a, i32 %b) nounwind readnone ssp {
+entry:
+; CHECK: test2
+; CHECK: subs
+; CHECK-NEXT: rsbmi
+; CHECK-NEXT: bx
+ %sub = sub nsw i32 %a, %b
+ %cmp = icmp sgt i32 %sub, -1
+ %sub1 = sub nsw i32 0, %sub
+ %cond = select i1 %cmp, i32 %sub, i32 %sub1
+ ret i32 %cond
+}
diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll
index 3f8fd75..73b546d 100644
--- a/test/CodeGen/ARM/ldrd.ll
+++ b/test/CodeGen/ARM/ldrd.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=fast | FileCheck %s -check-prefix=A8
-; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-m3 -regalloc=fast | FileCheck %s -check-prefix=M3
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=fast -optimize-regalloc=0 | FileCheck %s -check-prefix=A8
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-m3 -regalloc=fast -optimize-regalloc=0 | FileCheck %s -check-prefix=M3
; rdar://6949835
; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=BASIC
; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=greedy | FileCheck %s -check-prefix=GREEDY
@@ -18,7 +18,6 @@ entry:
; M3: t:
; M3-NOT: ldrd
-; M3: ldm.w r2, {r2, r3}
%0 = load i64** @b, align 4
%1 = load i64* %0, align 4
diff --git a/test/CodeGen/ARM/lsr-scale-addr-mode.ll b/test/CodeGen/ARM/lsr-scale-addr-mode.ll
index 8130019..0c8d387 100644
--- a/test/CodeGen/ARM/lsr-scale-addr-mode.ll
+++ b/test/CodeGen/ARM/lsr-scale-addr-mode.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | grep lsl | grep -F {lsl #2\]}
+; RUN: llc < %s -march=arm | grep lsl | grep -F "lsl #2]"
; Should use scaled addressing mode.
define void @sintzero(i32* %a) nounwind {
diff --git a/test/CodeGen/ARM/movt-movw-global.ll b/test/CodeGen/ARM/movt-movw-global.ll
index 991d728..bbedea1 100644
--- a/test/CodeGen/ARM/movt-movw-global.ll
+++ b/test/CodeGen/ARM/movt-movw-global.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -mtriple=armv7-eabi | FileCheck %s -check-prefix=EABI
-; RUN: llc < %s -mtriple=armv7-apple-ios -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=IOS
-; RUN: llc < %s -mtriple=armv7-apple-ios -relocation-model=pic | FileCheck %s -check-prefix=IOS-PIC
-; RUN: llc < %s -mtriple=armv7-apple-ios -relocation-model=static | FileCheck %s -check-prefix=IOS-STATIC
+; RUN: llc < %s -verify-machineinstrs -mtriple=armv7-eabi | FileCheck %s -check-prefix=EABI
+; RUN: llc < %s -verify-machineinstrs -mtriple=armv7-apple-ios -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=IOS
+; RUN: llc < %s -verify-machineinstrs -mtriple=armv7-apple-ios -relocation-model=pic | FileCheck %s -check-prefix=IOS-PIC
+; RUN: llc < %s -verify-machineinstrs -mtriple=armv7-apple-ios -relocation-model=static | FileCheck %s -check-prefix=IOS-STATIC
@foo = common global i32 0
diff --git a/test/CodeGen/ARM/neon_div.ll b/test/CodeGen/ARM/neon_div.ll
index de48fee..4a82c36 100644
--- a/test/CodeGen/ARM/neon_div.ll
+++ b/test/CodeGen/ARM/neon_div.ll
@@ -1,9 +1,9 @@
-; RUN: llc < %s -march=arm -mattr=+neon -pre-RA-sched=source | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -pre-RA-sched=source -disable-post-ra | FileCheck %s
define <8 x i8> @sdivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: vrecpe.f32
-;CHECK: vrecpe.f32
;CHECK: vmovn.i32
+;CHECK: vrecpe.f32
;CHECK: vmovn.i32
;CHECK: vmovn.i16
%tmp1 = load <8 x i8>* %A
@@ -15,10 +15,10 @@ define <8 x i8> @sdivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
define <8 x i8> @udivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: vrecpe.f32
;CHECK: vrecps.f32
+;CHECK: vmovn.i32
;CHECK: vrecpe.f32
;CHECK: vrecps.f32
;CHECK: vmovn.i32
-;CHECK: vmovn.i32
;CHECK: vqmovun.s16
%tmp1 = load <8 x i8>* %A
%tmp2 = load <8 x i8>* %B
diff --git a/test/CodeGen/ARM/opt-shuff-tstore.ll b/test/CodeGen/ARM/opt-shuff-tstore.ll
index b4da552..df98e23 100644
--- a/test/CodeGen/ARM/opt-shuff-tstore.ll
+++ b/test/CodeGen/ARM/opt-shuff-tstore.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=cortex-a9 -mtriple=arm-linux-unknown -promote-elements -mattr=+neon < %s | FileCheck %s
+; RUN: llc -mcpu=cortex-a9 -mtriple=arm-linux-unknown -mattr=+neon < %s | FileCheck %s
; CHECK: func_4_8
; CHECK: vst1.32
diff --git a/test/CodeGen/ARM/pr13249.ll b/test/CodeGen/ARM/pr13249.ll
new file mode 100644
index 0000000..4bc8810
--- /dev/null
+++ b/test/CodeGen/ARM/pr13249.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple armv7--linux-gnueabi
+
+define arm_aapcscc i8* @__strtok_r_1c(i8* %arg, i8 signext %arg1, i8** nocapture %arg2) nounwind {
+bb:
+ br label %bb3
+
+bb3: ; preds = %bb3, %bb
+ %tmp = phi i8* [ %tmp5, %bb3 ], [ %arg, %bb ]
+ %tmp4 = load i8* %tmp, align 1
+ %tmp5 = getelementptr inbounds i8* %tmp, i32 1
+ br i1 undef, label %bb3, label %bb7
+
+bb7: ; preds = %bb13, %bb3
+ %tmp8 = phi i8 [ %tmp14, %bb13 ], [ %tmp4, %bb3 ]
+ %tmp9 = phi i8* [ %tmp12, %bb13 ], [ %tmp, %bb3 ]
+ %tmp10 = icmp ne i8 %tmp8, %arg1
+ %tmp12 = getelementptr inbounds i8* %tmp9, i32 1
+ br i1 %tmp10, label %bb13, label %bb15
+
+bb13: ; preds = %bb7
+ %tmp14 = load i8* %tmp12, align 1
+ br label %bb7
+
+bb15: ; preds = %bb7
+ store i8* %tmp9, i8** %arg2, align 4
+ ret i8* %tmp
+}
diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll
index 3e07da8..418d4f3 100644
--- a/test/CodeGen/ARM/select.ll
+++ b/test/CodeGen/ARM/select.ll
@@ -113,3 +113,29 @@ entry:
call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, [2 x i32], i32, float)*)(i8* undef, i8* undef, [2 x i32] %tmp493, i32 0, float 1.000000e+00) optsize
ret void
}
+
+; CHECK: f10
+define float @f10(i32 %a, i32 %b) nounwind uwtable readnone ssp {
+; CHECK-NOT: floatsisf
+ %1 = icmp eq i32 %a, %b
+ %2 = zext i1 %1 to i32
+ %3 = sitofp i32 %2 to float
+ ret float %3
+}
+
+; CHECK: f11
+define float @f11(i32 %a, i32 %b) nounwind uwtable readnone ssp {
+; CHECK-NOT: floatsisf
+ %1 = icmp eq i32 %a, %b
+ %2 = sitofp i1 %1 to float
+ ret float %2
+}
+
+; CHECK: f12
+define float @f12(i32 %a, i32 %b) nounwind uwtable readnone ssp {
+; CHECK-NOT: floatunsisf
+ %1 = icmp eq i32 %a, %b
+ %2 = uitofp i1 %1 to float
+ ret float %2
+}
+
diff --git a/test/CodeGen/ARM/smml.ll b/test/CodeGen/ARM/smml.ll
new file mode 100644
index 0000000..99df0d4
--- /dev/null
+++ b/test/CodeGen/ARM/smml.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+define i32 @f(i32 %a, i32 %b, i32 %c) nounwind readnone ssp {
+entry:
+; CHECK-NOT: smmls
+ %conv4 = zext i32 %a to i64
+ %conv1 = sext i32 %b to i64
+ %conv2 = sext i32 %c to i64
+ %mul = mul nsw i64 %conv2, %conv1
+ %shr5 = lshr i64 %mul, 32
+ %sub = sub nsw i64 %conv4, %shr5
+ %conv3 = trunc i64 %sub to i32
+ ret i32 %conv3
+}
diff --git a/test/CodeGen/ARM/str_pre-2.ll b/test/CodeGen/ARM/str_pre-2.ll
index 983ba45..5ce2bce 100644
--- a/test/CodeGen/ARM/str_pre-2.ll
+++ b/test/CodeGen/ARM/str_pre-2.ll
@@ -1,13 +1,12 @@
-; RUN: llc < %s -mtriple=armv6-linux-gnu -regalloc=basic | FileCheck %s
-
-; The greedy register allocator uses a single CSR here, invalidating the test.
+; RUN: llc < %s -mtriple=armv6-linux-gnu | FileCheck %s
@b = external global i64*
define i64 @t(i64 %a) nounwind readonly {
entry:
-; CHECK: push {lr}
-; CHECK: pop {lr}
+; CHECK: push {r4, r5, lr}
+; CHECK: pop {r4, r5, pc}
+ call void asm sideeffect "", "~{r4},~{r5}"() nounwind
%0 = load i64** @b, align 4
%1 = load i64* %0, align 4
%2 = mul i64 %1, %a
diff --git a/test/CodeGen/ARM/str_pre.ll b/test/CodeGen/ARM/str_pre.ll
index e56e3f2..d8b3f0e 100644
--- a/test/CodeGen/ARM/str_pre.ll
+++ b/test/CodeGen/ARM/str_pre.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=arm | \
-; RUN: grep {str.*\\!} | count 2
+; RUN: grep "str.*\!" | count 2
define void @test1(i32* %X, i32* %A, i32** %dest) {
%B = load i32* %A ; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/struct_byval.ll b/test/CodeGen/ARM/struct_byval.ll
new file mode 100644
index 0000000..99ba475
--- /dev/null
+++ b/test/CodeGen/ARM/struct_byval.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=armv7-apple-ios6.0 | FileCheck %s
+
+; rdar://9877866
+%struct.SmallStruct = type { i32, [8 x i32], [37 x i8] }
+%struct.LargeStruct = type { i32, [1001 x i8], [300 x i32] }
+
+define i32 @f() nounwind ssp {
+entry:
+; CHECK: f:
+; CHECK: ldr
+; CHECK: str
+; CHECK-NOT:bne
+ %st = alloca %struct.SmallStruct, align 4
+ %call = call i32 @e1(%struct.SmallStruct* byval %st)
+ ret i32 0
+}
+
+; Generate a loop for large struct byval
+define i32 @g() nounwind ssp {
+entry:
+; CHECK: g:
+; CHECK: ldr
+; CHECK: sub
+; CHECK: str
+; CHECK: bne
+ %st = alloca %struct.LargeStruct, align 4
+ %call = call i32 @e2(%struct.LargeStruct* byval %st)
+ ret i32 0
+}
+
+; Generate a loop using NEON instructions
+define i32 @h() nounwind ssp {
+entry:
+; CHECK: h:
+; CHECK: vld1
+; CHECK: sub
+; CHECK: vst1
+; CHECK: bne
+ %st = alloca %struct.LargeStruct, align 16
+ %call = call i32 @e3(%struct.LargeStruct* byval align 16 %st)
+ ret i32 0
+}
+
+declare i32 @e1(%struct.SmallStruct* nocapture byval %in) nounwind
+declare i32 @e2(%struct.LargeStruct* nocapture byval %in) nounwind
+declare i32 @e3(%struct.LargeStruct* nocapture byval align 16 %in) nounwind
diff --git a/test/CodeGen/ARM/sub-cmp-peephole.ll b/test/CodeGen/ARM/sub-cmp-peephole.ll
new file mode 100644
index 0000000..6fcbdee
--- /dev/null
+++ b/test/CodeGen/ARM/sub-cmp-peephole.ll
@@ -0,0 +1,65 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
+
+define i32 @f(i32 %a, i32 %b) nounwind ssp {
+entry:
+; CHECK: f:
+; CHECK: subs
+; CHECK-NOT: cmp
+ %cmp = icmp sgt i32 %a, %b
+ %sub = sub nsw i32 %a, %b
+ %sub. = select i1 %cmp, i32 %sub, i32 0
+ ret i32 %sub.
+}
+
+define i32 @g(i32 %a, i32 %b) nounwind ssp {
+entry:
+; CHECK: g:
+; CHECK: subs
+; CHECK-NOT: cmp
+ %cmp = icmp slt i32 %a, %b
+ %sub = sub nsw i32 %b, %a
+ %sub. = select i1 %cmp, i32 %sub, i32 0
+ ret i32 %sub.
+}
+
+define i32 @h(i32 %a, i32 %b) nounwind ssp {
+entry:
+; CHECK: h:
+; CHECK: subs
+; CHECK-NOT: cmp
+ %cmp = icmp sgt i32 %a, 3
+ %sub = sub nsw i32 %a, 3
+ %sub. = select i1 %cmp, i32 %sub, i32 %b
+ ret i32 %sub.
+}
+
+; rdar://11725965
+define i32 @i(i32 %a, i32 %b) nounwind readnone ssp {
+entry:
+; CHECK: i:
+; CHECK: subs
+; CHECK-NOT: cmp
+ %cmp = icmp ult i32 %a, %b
+ %sub = sub i32 %b, %a
+ %sub. = select i1 %cmp, i32 %sub, i32 0
+ ret i32 %sub.
+}
+; If CPSR is live-out, we can't remove cmp if there exists
+; a swapped sub.
+define i32 @j(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: j:
+; CHECK: sub
+; CHECK: cmp
+ %cmp = icmp eq i32 %b, %a
+ %sub = sub nsw i32 %a, %b
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+ %cmp2 = icmp sgt i32 %b, %a
+ %sel = select i1 %cmp2, i32 %sub, i32 %a
+ ret i32 %sel
+
+if.else:
+ ret i32 %sub
+}
diff --git a/test/CodeGen/ARM/sub.ll b/test/CodeGen/ARM/sub.ll
index 06ea703..474043a 100644
--- a/test/CodeGen/ARM/sub.ll
+++ b/test/CodeGen/ARM/sub.ll
@@ -36,3 +36,15 @@ entry:
%sel = select i1 %cmp, i32 1, i32 %sub
ret i32 %sel
}
+
+; rdar://11726136
+define i32 @f5(i32 %x) {
+entry:
+; CHECK: f5
+; CHECK: movw r1, #65535
+; CHECK-NOT: movt
+; CHECK-NOT: add
+; CHECK: sub r0, r0, r1
+ %sub = add i32 %x, -65535
+ ret i32 %sub
+}
diff --git a/test/CodeGen/ARM/thread_pointer.ll b/test/CodeGen/ARM/thread_pointer.ll
index 3143387..c403fa5 100644
--- a/test/CodeGen/ARM/thread_pointer.ll
+++ b/test/CodeGen/ARM/thread_pointer.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
-; RUN: grep {__aeabi_read_tp}
+; RUN: grep "__aeabi_read_tp"
define i8* @test() {
entry:
diff --git a/test/CodeGen/ARM/thumb2-it-block.ll b/test/CodeGen/ARM/thumb2-it-block.ll
index 28fd469..a25352c 100644
--- a/test/CodeGen/ARM/thumb2-it-block.ll
+++ b/test/CodeGen/ARM/thumb2-it-block.ll
@@ -3,10 +3,10 @@
define i32 @test(i32 %a, i32 %b) {
entry:
-; CHECK: movs.w
+; CHECK: cmp
; CHECK-NEXT: it mi
; CHECK-NEXT: rsbmi
-; CHECK-NEXT: movs.w
+; CHECK-NEXT: cmp
; CHECK-NEXT: it mi
; CHECK-NEXT: rsbmi
%cmp1 = icmp slt i32 %a, 0
diff --git a/test/CodeGen/ARM/tls-models.ll b/test/CodeGen/ARM/tls-models.ll
new file mode 100644
index 0000000..a5f3c90
--- /dev/null
+++ b/test/CodeGen/ARM/tls-models.ll
@@ -0,0 +1,117 @@
+; RUN: llc -march=arm -mtriple=arm-linux-gnueabi < %s | FileCheck -check-prefix=CHECK-NONPIC %s
+; RUN: llc -march=arm -mtriple=arm-linux-gnueabi -relocation-model=pic < %s | FileCheck -check-prefix=CHECK-PIC %s
+
+
+@external_gd = external thread_local global i32
+@internal_gd = internal thread_local global i32 42
+
+@external_ld = external thread_local(localdynamic) global i32
+@internal_ld = internal thread_local(localdynamic) global i32 42
+
+@external_ie = external thread_local(initialexec) global i32
+@internal_ie = internal thread_local(initialexec) global i32 42
+
+@external_le = external thread_local(localexec) global i32
+@internal_le = internal thread_local(localexec) global i32 42
+
+; ----- no model specified -----
+
+define i32* @f1() {
+entry:
+ ret i32* @external_gd
+
+ ; Non-PIC code can use initial-exec, PIC code has to use general dynamic.
+ ; CHECK-NONPIC: f1:
+ ; CHECK-NONPIC: external_gd(gottpoff)
+ ; CHECK-PIC: f1:
+ ; CHECK-PIC: external_gd(tlsgd)
+}
+
+define i32* @f2() {
+entry:
+ ret i32* @internal_gd
+
+ ; Non-PIC code can use local exec, PIC code can use local dynamic,
+ ; but that is not implemented, so falls back to general dynamic.
+ ; CHECK-NONPIC: f2:
+ ; CHECK-NONPIC: internal_gd(tpoff)
+ ; CHECK-PIC: f2:
+ ; CHECK-PIC: internal_gd(tlsgd)
+}
+
+
+; ----- localdynamic specified -----
+
+define i32* @f3() {
+entry:
+ ret i32* @external_ld
+
+ ; Non-PIC code can use initial exec, PIC should use local dynamic,
+ ; but that is not implemented, so falls back to general dynamic.
+ ; CHECK-NONPIC: f3:
+ ; CHECK-NONPIC: external_ld(gottpoff)
+ ; CHECK-PIC: f3:
+ ; CHECK-PIC: external_ld(tlsgd)
+}
+
+define i32* @f4() {
+entry:
+ ret i32* @internal_ld
+
+ ; Non-PIC code can use local exec, PIC code can use local dynamic,
+ ; but that is not implemented, so it falls back to general dynamic.
+ ; CHECK-NONPIC: f4:
+ ; CHECK-NONPIC: internal_ld(tpoff)
+ ; CHECK-PIC: f4:
+ ; CHECK-PIC: internal_ld(tlsgd)
+}
+
+
+; ----- initialexec specified -----
+
+define i32* @f5() {
+entry:
+ ret i32* @external_ie
+
+ ; Non-PIC and PIC code will use initial exec as specified.
+ ; CHECK-NONPIC: f5:
+ ; CHECK-NONPIC: external_ie(gottpoff)
+ ; CHECK-PIC: f5:
+ ; CHECK-PIC: external_ie(gottpoff)
+}
+
+define i32* @f6() {
+entry:
+ ret i32* @internal_ie
+
+ ; Non-PIC code can use local exec, PIC code use initial exec as specified.
+ ; CHECK-NONPIC: f6:
+ ; CHECK-NONPIC: internal_ie(tpoff)
+ ; CHECK-PIC: f6:
+ ; CHECK-PIC: internal_ie(gottpoff)
+}
+
+
+; ----- localexec specified -----
+
+define i32* @f7() {
+entry:
+ ret i32* @external_le
+
+ ; Non-PIC and PIC code will use local exec as specified.
+ ; CHECK-NONPIC: f7:
+ ; CHECK-NONPIC: external_le(tpoff)
+ ; CHECK-PIC: f7:
+ ; CHECK-PIC: external_le(tpoff)
+}
+
+define i32* @f8() {
+entry:
+ ret i32* @internal_le
+
+ ; Non-PIC and PIC code will use local exec as specified.
+ ; CHECK-NONPIC: f8:
+ ; CHECK-NONPIC: internal_le(tpoff)
+ ; CHECK-PIC: f8:
+ ; CHECK-PIC: internal_le(tpoff)
+}
diff --git a/test/CodeGen/ARM/tls1.ll b/test/CodeGen/ARM/tls1.ll
index 1087094..ec4278c 100644
--- a/test/CodeGen/ARM/tls1.ll
+++ b/test/CodeGen/ARM/tls1.ll
@@ -1,9 +1,9 @@
; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
-; RUN: grep {i(tpoff)}
+; RUN: grep "i(tpoff)"
; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
-; RUN: grep {__aeabi_read_tp}
+; RUN: grep "__aeabi_read_tp"
; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi \
-; RUN: -relocation-model=pic | grep {__tls_get_addr}
+; RUN: -relocation-model=pic | grep "__tls_get_addr"
@i = thread_local global i32 15 ; <i32*> [#uses=2]
diff --git a/test/CodeGen/ARM/tls3.ll b/test/CodeGen/ARM/tls3.ll
index df7a4ca..e0e944f 100644
--- a/test/CodeGen/ARM/tls3.ll
+++ b/test/CodeGen/ARM/tls3.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
-; RUN: grep {tbss}
+; RUN: grep "tbss"
%struct.anon = type { i32, i32 }
@teste = internal thread_local global %struct.anon zeroinitializer ; <%struct.anon*> [#uses=1]
diff --git a/test/CodeGen/ARM/twoaddrinstr.ll b/test/CodeGen/ARM/twoaddrinstr.ll
new file mode 100644
index 0000000..4e227dd
--- /dev/null
+++ b/test/CodeGen/ARM/twoaddrinstr.ll
@@ -0,0 +1,21 @@
+; Tests for the two-address instruction pass.
+; RUN: llc -march=arm -mcpu=cortex-a9 < %s | FileCheck %s
+
+define void @PR13378() nounwind {
+; This was orriginally a crasher trying to schedule the instructions.
+; CHECK: PR13378:
+; CHECK: vldmia
+; CHECK-NEXT: vmov.f32
+; CHECK-NEXT: vstmia
+; CHECK-NEXT: vstmia
+; CHECK-NEXT: vmov.f32
+; CHECK-NEXT: vstmia
+
+entry:
+ %0 = load <4 x float>* undef
+ store <4 x float> zeroinitializer, <4 x float>* undef
+ store <4 x float> %0, <4 x float>* undef
+ %1 = insertelement <4 x float> %0, float 1.000000e+00, i32 3
+ store <4 x float> %1, <4 x float>* undef
+ unreachable
+}
diff --git a/test/CodeGen/ARM/unsafe-fsub.ll b/test/CodeGen/ARM/unsafe-fsub.ll
new file mode 100644
index 0000000..3a4477d
--- /dev/null
+++ b/test/CodeGen/ARM/unsafe-fsub.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=arm -mcpu=cortex-a9 < %s | FileCheck -check-prefix=SAFE %s
+; RUN: llc -march=arm -mcpu=cortex-a9 -enable-unsafe-fp-math < %s | FileCheck -check-prefix=FAST %s
+
+target triple = "armv7-apple-ios"
+
+; SAFE: test
+; FAST: test
+define float @test(float %x, float %y) {
+entry:
+; SAFE: vmul.f32
+; SAFE: vsub.f32
+; FAST: mov r0, #0
+ %0 = fmul float %x, %y
+ %1 = fsub float %0, %0
+ ret float %1
+}
+
+
diff --git a/test/CodeGen/ARM/vcnt.ll b/test/CodeGen/ARM/vcnt.ll
index 450f90d..9f55c24 100644
--- a/test/CodeGen/ARM/vcnt.ll
+++ b/test/CodeGen/ARM/vcnt.ll
@@ -1,79 +1,80 @@
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; NB: this tests vcnt, vclz, and vcls
define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind {
;CHECK: vcnt8:
-;CHECK: vcnt.8
+;CHECK: vcnt.8 {{d[0-9]+}}, {{d[0-9]+}}
%tmp1 = load <8 x i8>* %A
- %tmp2 = call <8 x i8> @llvm.arm.neon.vcnt.v8i8(<8 x i8> %tmp1)
+ %tmp2 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %tmp1)
ret <8 x i8> %tmp2
}
define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind {
;CHECK: vcntQ8:
-;CHECK: vcnt.8
+;CHECK: vcnt.8 {{q[0-9]+}}, {{q[0-9]+}}
%tmp1 = load <16 x i8>* %A
- %tmp2 = call <16 x i8> @llvm.arm.neon.vcnt.v16i8(<16 x i8> %tmp1)
+ %tmp2 = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %tmp1)
ret <16 x i8> %tmp2
}
-declare <8 x i8> @llvm.arm.neon.vcnt.v8i8(<8 x i8>) nounwind readnone
-declare <16 x i8> @llvm.arm.neon.vcnt.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i8> @llvm.ctpop.v8i8(<8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) nounwind readnone
define <8 x i8> @vclz8(<8 x i8>* %A) nounwind {
;CHECK: vclz8:
-;CHECK: vclz.i8
+;CHECK: vclz.i8 {{d[0-9]+}}, {{d[0-9]+}}
%tmp1 = load <8 x i8>* %A
- %tmp2 = call <8 x i8> @llvm.arm.neon.vclz.v8i8(<8 x i8> %tmp1)
+ %tmp2 = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %tmp1, i1 0)
ret <8 x i8> %tmp2
}
define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
;CHECK: vclz16:
-;CHECK: vclz.i16
+;CHECK: vclz.i16 {{d[0-9]+}}, {{d[0-9]+}}
%tmp1 = load <4 x i16>* %A
- %tmp2 = call <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16> %tmp1)
+ %tmp2 = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %tmp1, i1 0)
ret <4 x i16> %tmp2
}
define <2 x i32> @vclz32(<2 x i32>* %A) nounwind {
;CHECK: vclz32:
-;CHECK: vclz.i32
+;CHECK: vclz.i32 {{d[0-9]+}}, {{d[0-9]+}}
%tmp1 = load <2 x i32>* %A
- %tmp2 = call <2 x i32> @llvm.arm.neon.vclz.v2i32(<2 x i32> %tmp1)
+ %tmp2 = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %tmp1, i1 0)
ret <2 x i32> %tmp2
}
define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind {
;CHECK: vclzQ8:
-;CHECK: vclz.i8
+;CHECK: vclz.i8 {{q[0-9]+}}, {{q[0-9]+}}
%tmp1 = load <16 x i8>* %A
- %tmp2 = call <16 x i8> @llvm.arm.neon.vclz.v16i8(<16 x i8> %tmp1)
+ %tmp2 = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %tmp1, i1 0)
ret <16 x i8> %tmp2
}
define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind {
;CHECK: vclzQ16:
-;CHECK: vclz.i16
+;CHECK: vclz.i16 {{q[0-9]+}}, {{q[0-9]+}}
%tmp1 = load <8 x i16>* %A
- %tmp2 = call <8 x i16> @llvm.arm.neon.vclz.v8i16(<8 x i16> %tmp1)
+ %tmp2 = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %tmp1, i1 0)
ret <8 x i16> %tmp2
}
define <4 x i32> @vclzQ32(<4 x i32>* %A) nounwind {
;CHECK: vclzQ32:
-;CHECK: vclz.i32
+;CHECK: vclz.i32 {{q[0-9]+}}, {{q[0-9]+}}
%tmp1 = load <4 x i32>* %A
- %tmp2 = call <4 x i32> @llvm.arm.neon.vclz.v4i32(<4 x i32> %tmp1)
+ %tmp2 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %tmp1, i1 0)
ret <4 x i32> %tmp2
}
-declare <8 x i8> @llvm.arm.neon.vclz.v8i8(<8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vclz.v2i32(<2 x i32>) nounwind readnone
+declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1) nounwind readnone
+declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1) nounwind readnone
+declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
-declare <16 x i8> @llvm.arm.neon.vclz.v16i8(<16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm.neon.vclz.v8i16(<8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vclz.v4i32(<4 x i32>) nounwind readnone
+declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) nounwind readnone
+declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) nounwind readnone
+declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
define <8 x i8> @vclss8(<8 x i8>* %A) nounwind {
;CHECK: vclss8:
diff --git a/test/CodeGen/ARM/vector-extend-narrow.ll b/test/CodeGen/ARM/vector-extend-narrow.ll
index 1ec36da..8fd3db2 100644
--- a/test/CodeGen/ARM/vector-extend-narrow.ll
+++ b/test/CodeGen/ARM/vector-extend-narrow.ll
@@ -20,7 +20,9 @@ define float @f(<4 x i16>* nocapture %in) {
; CHECK: g:
define float @g(<4 x i8>* nocapture %in) {
- ; CHECK: vldr
+; Note: vld1 here is reasonably important. Mixing VFP and NEON
+; instructions is bad on some cores
+ ; CHECK: vld1
; CHECK: vmovl.u8
; CHECK: vmovl.u16
%1 = load <4 x i8>* %in
@@ -47,7 +49,9 @@ define <4 x i8> @h(<4 x float> %v) {
; CHECK: i:
define <4 x i8> @i(<4 x i8>* %x) {
- ; CHECK: vldr
+; Note: vld1 here is reasonably important. Mixing VFP and NEON
+; instructions is bad on some cores
+ ; CHECK: vld1
; CHECK: vmovl.s8
; CHECK: vmovl.s16
; CHECK: vrecpe
diff --git a/test/CodeGen/ARM/vlddup.ll b/test/CodeGen/ARM/vlddup.ll
index 61d73c1..c69473f 100644
--- a/test/CodeGen/ARM/vlddup.ll
+++ b/test/CodeGen/ARM/vlddup.ll
@@ -75,12 +75,12 @@ define <8 x i8> @vld2dupi8(i8* %A) nounwind {
ret <8 x i8> %tmp5
}
-define <4 x i16> @vld2dupi16(i16* %A) nounwind {
+define <4 x i16> @vld2dupi16(i8* %A) nounwind {
;CHECK: vld2dupi16:
;Check that a power-of-two alignment smaller than the total size of the memory
;being loaded is ignored.
;CHECK: vld2.16 {d16[], d17[]}, [r0]
- %tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i16* %A, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
+ %tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %A, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
%tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
%tmp3 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 1
@@ -94,7 +94,8 @@ define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind {
;CHECK: vld2dupi16_update:
;CHECK: vld2.16 {d16[], d17[]}, [r1]!
%A = load i16** %ptr
- %tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i16* %A, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
+ %A2 = bitcast i16* %A to i8*
+ %tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %A2, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
%tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
%tmp3 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 1
@@ -105,11 +106,11 @@ define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind {
ret <4 x i16> %tmp5
}
-define <2 x i32> @vld2dupi32(i32* %A) nounwind {
+define <2 x i32> @vld2dupi32(i8* %A) nounwind {
;CHECK: vld2dupi32:
;Check the alignment value. Max for this instruction is 64 bits:
;CHECK: vld2.32 {d16[], d17[]}, [r0, :64]
- %tmp0 = tail call %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i32* %A, <2 x i32> undef, <2 x i32> undef, i32 0, i32 16)
+ %tmp0 = tail call %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %A, <2 x i32> undef, <2 x i32> undef, i32 0, i32 16)
%tmp1 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 0
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
%tmp3 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 1
@@ -119,8 +120,8 @@ define <2 x i32> @vld2dupi32(i32* %A) nounwind {
}
declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
-declare %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i16*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
-declare %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i32*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
@@ -144,11 +145,11 @@ define <8 x i8> @vld3dupi8_update(i8** %ptr, i32 %inc) nounwind {
ret <8 x i8> %tmp8
}
-define <4 x i16> @vld3dupi16(i16* %A) nounwind {
+define <4 x i16> @vld3dupi16(i8* %A) nounwind {
;CHECK: vld3dupi16:
;Check the (default) alignment value. VLD3 does not support alignment.
;CHECK: vld3.16 {d16[], d17[], d18[]}, [r0]
- %tmp0 = tail call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i16* %A, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 8)
+ %tmp0 = tail call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %A, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 8)
%tmp1 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 0
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
%tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 1
@@ -161,7 +162,7 @@ define <4 x i16> @vld3dupi16(i16* %A) nounwind {
}
declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
-declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i16*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
@@ -171,7 +172,8 @@ define <4 x i16> @vld4dupi16_update(i16** %ptr) nounwind {
;CHECK: vld4dupi16_update:
;CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r1]!
%A = load i16** %ptr
- %tmp0 = tail call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i16* %A, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 1)
+ %A2 = bitcast i16* %A to i8*
+ %tmp0 = tail call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %A2, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 1)
%tmp1 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 0
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
%tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 1
@@ -188,12 +190,12 @@ define <4 x i16> @vld4dupi16_update(i16** %ptr) nounwind {
ret <4 x i16> %tmp11
}
-define <2 x i32> @vld4dupi32(i32* %A) nounwind {
+define <2 x i32> @vld4dupi32(i8* %A) nounwind {
;CHECK: vld4dupi32:
;Check the alignment value. An 8-byte alignment is allowed here even though
;it is smaller than the total size of the memory being loaded.
;CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r0, :64]
- %tmp0 = tail call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i32* %A, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 8)
+ %tmp0 = tail call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %A, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 8)
%tmp1 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 0
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
%tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 1
@@ -208,5 +210,5 @@ define <2 x i32> @vld4dupi32(i32* %A) nounwind {
ret <2 x i32> %tmp11
}
-declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i16*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
-declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i32*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll
index 61d89bb..74628f0 100644
--- a/test/CodeGen/ARM/vmul.ll
+++ b/test/CodeGen/ARM/vmul.ll
@@ -525,3 +525,77 @@ define i16 @vmullWithInconsistentExtensions(<8 x i8> %vec) {
%3 = extractelement <8 x i16> %2, i32 0
ret i16 %3
}
+
+; A constant build_vector created for a vmull with half-width elements must
+; not introduce illegal types. <rdar://problem/11324364>
+define void @vmull_buildvector() nounwind optsize ssp align 2 {
+; CHECK: vmull_buildvector
+entry:
+ br i1 undef, label %for.end179, label %for.body.lr.ph
+
+for.body.lr.ph: ; preds = %entry
+ br label %for.body
+
+for.cond.loopexit: ; preds = %for.body33, %for.body
+ br i1 undef, label %for.end179, label %for.body
+
+for.body: ; preds = %for.cond.loopexit, %for.body.lr.ph
+ br i1 undef, label %for.cond.loopexit, label %for.body33.lr.ph
+
+for.body33.lr.ph: ; preds = %for.body
+ %.sub = select i1 undef, i32 0, i32 undef
+ br label %for.body33
+
+for.body33: ; preds = %for.body33, %for.body33.lr.ph
+ %add45 = add i32 undef, undef
+ %vld155 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* undef, i32 1)
+ %0 = load i32** undef, align 4
+ %shuffle.i250 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
+ %1 = bitcast <1 x i64> %shuffle.i250 to <8 x i8>
+ %vmovl.i249 = zext <8 x i8> %1 to <8 x i16>
+ %shuffle.i246 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
+ %shuffle.i240 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> <i32 1>
+ %2 = bitcast <1 x i64> %shuffle.i240 to <8 x i8>
+ %3 = bitcast <16 x i8> undef to <2 x i64>
+ %vmovl.i237 = zext <8 x i8> undef to <8 x i16>
+ %shuffle.i234 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
+ %shuffle.i226 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
+ %vmovl.i225 = zext <8 x i8> undef to <8 x i16>
+ %mul.i223 = mul <8 x i16> %vmovl.i249, %vmovl.i249
+ %vshl_n = shl <8 x i16> %mul.i223, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+ %vqsub2.i216 = tail call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> <i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256>, <8 x i16> %vshl_n) nounwind
+ %mul.i209 = mul <8 x i16> undef, <i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80>
+ %vshr_n130 = lshr <8 x i16> undef, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+ %vshr_n134 = lshr <8 x i16> %mul.i209, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+ %sub.i205 = sub <8 x i16> <i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80>, %vshr_n130
+ %sub.i203 = sub <8 x i16> <i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80, i16 80>, %vshr_n134
+ %add.i200 = add <8 x i16> %sub.i205, <i16 96, i16 96, i16 96, i16 96, i16 96, i16 96, i16 96, i16 96>
+ %add.i198 = add <8 x i16> %add.i200, %sub.i203
+ %mul.i194 = mul <8 x i16> %add.i198, %vmovl.i237
+ %mul.i191 = mul <8 x i16> %vshr_n130, undef
+ %add.i192 = add <8 x i16> %mul.i191, %mul.i194
+ %mul.i187 = mul <8 x i16> %vshr_n134, undef
+ %add.i188 = add <8 x i16> %mul.i187, %add.i192
+ %mul.i185 = mul <8 x i16> undef, undef
+ %add.i186 = add <8 x i16> %mul.i185, undef
+ %vrshr_n160 = tail call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %add.i188, <8 x i16> <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>)
+ %vrshr_n163 = tail call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %add.i186, <8 x i16> <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>)
+ %mul.i184 = mul <8 x i16> undef, %vrshr_n160
+ %mul.i181 = mul <8 x i16> undef, %vmovl.i225
+ %add.i182 = add <8 x i16> %mul.i181, %mul.i184
+ %vrshr_n170 = tail call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %add.i182, <8 x i16> <i16 -7, i16 -7, i16 -7, i16 -7, i16 -7, i16 -7, i16 -7, i16 -7>)
+ %vqmovn1.i180 = tail call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %vrshr_n170) nounwind
+ %4 = bitcast <8 x i8> %vqmovn1.i180 to <1 x i64>
+ %shuffle.i = shufflevector <1 x i64> %4, <1 x i64> undef, <2 x i32> <i32 0, i32 1>
+ %5 = bitcast <2 x i64> %shuffle.i to <16 x i8>
+ store <16 x i8> %5, <16 x i8>* undef, align 16
+ %add177 = add nsw i32 undef, 16
+ br i1 undef, label %for.body33, label %for.cond.loopexit
+
+for.end179: ; preds = %for.cond.loopexit, %entry
+ ret void
+}
+
+declare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) nounwind readnone
diff --git a/test/CodeGen/ARM/vst3.ll b/test/CodeGen/ARM/vst3.ll
index e3372a0..f117ab2 100644
--- a/test/CodeGen/ARM/vst3.ll
+++ b/test/CodeGen/ARM/vst3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon -disable-arm-fast-isel -O0 | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -fast-isel=0 -O0 | FileCheck %s
define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind {
;CHECK: vst3i8: