diff options
Diffstat (limited to 'test/CodeGen/Thumb2')
-rw-r--r-- | test/CodeGen/Thumb2/2009-08-21-PostRAKill4.ll | 26 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/2009-09-01-PostRAProlog.ll | 106 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll | 11 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll | 5 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/buildvector-crash.ll | 17 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/cortex-fp.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/cross-rc-coalescing-2.ll | 15 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/large-stack.ll | 2 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/ldr-str-imm12.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/lsr-deficiency.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/machine-licm-vdup.ll | 38 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/machine-licm.ll | 8 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/thumb2-badreg-operands.ll | 15 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/thumb2-barrier.ll | 32 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/thumb2-spill-q.ll | 31 |
17 files changed, 236 insertions, 90 deletions
diff --git a/test/CodeGen/Thumb2/2009-08-21-PostRAKill4.ll b/test/CodeGen/Thumb2/2009-08-21-PostRAKill4.ll new file mode 100644 index 0000000..5cfc68d --- /dev/null +++ b/test/CodeGen/Thumb2/2009-08-21-PostRAKill4.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -post-RA-scheduler + +; ModuleID = '<stdin>' +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64" +target triple = "armv7-apple-darwin9" + +@.str = external constant [36 x i8], align 1 ; <[36 x i8]*> [#uses=0] +@.str1 = external constant [31 x i8], align 1 ; <[31 x i8]*> [#uses=1] +@.str2 = external constant [4 x i8], align 1 ; <[4 x i8]*> [#uses=1] + +declare i32 @getUnknown(i32, ...) nounwind + +declare void @llvm.va_start(i8*) nounwind + +declare void @llvm.va_end(i8*) nounwind + +declare i32 @printf(i8* nocapture, ...) nounwind + +define i32 @main() nounwind { +entry: + %0 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([31 x i8]* @.str1, i32 0, i32 0), i32 1, i32 1, i32 1, i32 1, i32 1, i32 1) nounwind ; <i32> [#uses=0] + %1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([31 x i8]* @.str1, i32 0, i32 0), i32 -128, i32 116, i32 116, i32 -3852, i32 -31232, i32 -1708916736) nounwind ; <i32> [#uses=0] + %2 = tail call i32 (i32, ...)* @getUnknown(i32 undef, i32 116, i32 116, i32 -3852, i32 -31232, i32 30556, i32 -1708916736) nounwind ; <i32> [#uses=1] + %3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @.str2, i32 0, i32 0), i32 %2) nounwind ; <i32> [#uses=0] + ret i32 0 +} diff --git a/test/CodeGen/Thumb2/2009-09-01-PostRAProlog.ll b/test/CodeGen/Thumb2/2009-09-01-PostRAProlog.ll new file mode 100644 index 0000000..06a152d --- /dev/null +++ b/test/CodeGen/Thumb2/2009-09-01-PostRAProlog.ll @@ -0,0 +1,106 @@ +; RUN: llc -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 < %s | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32" +target triple = "thumbv7-apple-darwin9" + +@history = internal global [2 x [56 x i32]] [[56 x i32] [i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0], [56 x i32] [i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0]] ; <[2 x [56 x i32]]*> [#uses=3] +@nodes = internal global i64 0 ; <i64*> [#uses=4] +@.str = private constant [9 x i8] c"##-<=>+#\00", align 1 ; <[9 x i8]*> [#uses=2] +@.str1 = private constant [6 x i8] c"%c%d\0A\00", align 1 ; <[6 x i8]*> [#uses=1] +@.str2 = private constant [16 x i8] c"Fhourstones 2.0\00", align 1 ; <[16 x i8]*> [#uses=1] +@.str3 = private constant [54 x i8] c"Using %d transposition table entries with %d probes.\0A\00", align 1 ; <[54 x i8]*> [#uses=1] +@.str4 = private constant [31 x i8] c"Solving %d-ply position after \00", align 1 ; <[31 x i8]*> [#uses=1] +@.str5 = private constant [7 x i8] c" . . .\00", align 1 ; <[7 x i8]*> [#uses=1] +@.str6 = private constant [28 x i8] c"score = %d (%c) work = %d\0A\00", align 1 ; <[28 x i8]*> [#uses=1] +@.str7 = private constant [36 x i8] c"%lu pos / %lu msec = %.1f Kpos/sec\0A\00", align 1 ; <[36 x i8]*> [#uses=1] +@plycnt = internal global i32 0 ; <i32*> [#uses=21] +@dias = internal global [19 x i32] zeroinitializer ; <[19 x i32]*> [#uses=43] +@columns = internal global [128 x i32] zeroinitializer ; <[128 x i32]*> [#uses=18] +@height = internal global [128 x i32] zeroinitializer ; <[128 x i32]*> [#uses=21] +@rows = internal global [8 x i32] zeroinitializer ; <[8 x i32]*> [#uses=20] +@colthr = internal global [128 x i32] zeroinitializer ; <[128 x i32]*> [#uses=5] +@moves = internal global [44 x i32] zeroinitializer ; <[44 x i32]*> [#uses=9] +@.str8 = private constant [3 x i8] c"%d\00", align 1 ; <[3 x i8]*> [#uses=1] +@he = internal global i8* null ; <i8**> [#uses=9] +@hits = internal global i64 0 ; <i64*> [#uses=8] +@posed = internal global i64 0 ; <i64*> [#uses=7] +@ht = internal global i32* null ; <i32**> [#uses=5] +@.str16 = private constant [19 x i8] c"store rate = %.3f\0A\00", align 1 ; <[19 x i8]*> [#uses=1] +@.str117 = private constant [45 x i8] c"- %5.3f < %5.3f = %5.3f > %5.3f + %5.3f\0A\00", align 1 ; <[45 x i8]*> [#uses=1] +@.str218 = private constant [6 x i8] c"%7d%c\00", align 1 ; <[6 x i8]*> [#uses=1] +@.str319 = private constant [30 x i8] c"Failed to allocate %u bytes.\0A\00", align 1 ; <[30 x i8]*> [#uses=1] + +declare i32 @puts(i8* nocapture) nounwind + +declare i32 @getchar() nounwind + +define internal i32 @transpose() nounwind readonly { +; CHECK: push +entry: + %0 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 1), align 4 ; <i32> [#uses=1] + %1 = shl i32 %0, 7 ; <i32> [#uses=1] + %2 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 2), align 4 ; <i32> [#uses=1] + %3 = or i32 %1, %2 ; <i32> [#uses=1] + %4 = shl i32 %3, 7 ; <i32> [#uses=1] + %5 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 3), align 4 ; <i32> [#uses=1] + %6 = or i32 %4, %5 ; <i32> [#uses=3] + %7 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 7), align 4 ; <i32> [#uses=1] + %8 = shl i32 %7, 7 ; <i32> [#uses=1] + %9 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 6), align 4 ; <i32> [#uses=1] + %10 = or i32 %8, %9 ; <i32> [#uses=1] + %11 = shl i32 %10, 7 ; <i32> [#uses=1] + %12 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 5), align 4 ; <i32> [#uses=1] + %13 = or i32 %11, %12 ; <i32> [#uses=3] + %14 = icmp ugt i32 %6, %13 ; <i1> [#uses=2] + %.pn2.in.i = select i1 %14, i32 %6, i32 %13 ; <i32> [#uses=1] + %.pn1.in.i = select i1 %14, i32 %13, i32 %6 ; <i32> [#uses=1] + %.pn2.i = shl i32 %.pn2.in.i, 7 ; <i32> [#uses=1] + %.pn3.i = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 4) ; <i32> [#uses=1] + %.pn.in.in.i = or i32 %.pn2.i, %.pn3.i ; <i32> [#uses=1] + %.pn.in.i = zext i32 %.pn.in.in.i to i64 ; <i64> [#uses=1] + %.pn.i = shl i64 %.pn.in.i, 21 ; <i64> [#uses=1] + %.pn1.i = zext i32 %.pn1.in.i to i64 ; <i64> [#uses=1] + %iftmp.22.0.i = or i64 %.pn.i, %.pn1.i ; <i64> [#uses=2] + %15 = lshr i64 %iftmp.22.0.i, 17 ; <i64> [#uses=1] + %16 = trunc i64 %15 to i32 ; <i32> [#uses=2] + %17 = urem i64 %iftmp.22.0.i, 1050011 ; <i64> [#uses=1] + %18 = trunc i64 %17 to i32 ; <i32> [#uses=1] + %19 = urem i32 %16, 179 ; <i32> [#uses=1] + %20 = or i32 %19, 131072 ; <i32> [#uses=1] + %21 = load i32** @ht, align 4 ; <i32*> [#uses=1] + br label %bb5 + +bb: ; preds = %bb5 + %22 = getelementptr inbounds i32* %21, i32 %x.0 ; <i32*> [#uses=1] + %23 = load i32* %22, align 4 ; <i32> [#uses=1] + %24 = icmp eq i32 %23, %16 ; <i1> [#uses=1] + br i1 %24, label %bb1, label %bb2 + +bb1: ; preds = %bb + %25 = load i8** @he, align 4 ; <i8*> [#uses=1] + %26 = getelementptr inbounds i8* %25, i32 %x.0 ; <i8*> [#uses=1] + %27 = load i8* %26, align 1 ; <i8> [#uses=1] + %28 = sext i8 %27 to i32 ; <i32> [#uses=1] + ret i32 %28 + +bb2: ; preds = %bb + %29 = add nsw i32 %20, %x.0 ; <i32> [#uses=3] + %30 = add i32 %29, -1050011 ; <i32> [#uses=1] + %31 = icmp sgt i32 %29, 1050010 ; <i1> [#uses=1] + %. = select i1 %31, i32 %30, i32 %29 ; <i32> [#uses=1] + %32 = add i32 %33, 1 ; <i32> [#uses=1] + br label %bb5 + +bb5: ; preds = %bb2, %entry + %33 = phi i32 [ 0, %entry ], [ %32, %bb2 ] ; <i32> [#uses=2] + %x.0 = phi i32 [ %18, %entry ], [ %., %bb2 ] ; <i32> [#uses=3] + %34 = icmp sgt i32 %33, 7 ; <i1> [#uses=1] + br i1 %34, label %bb7, label %bb + +bb7: ; preds = %bb5 + ret i32 -128 +} + +declare noalias i8* @calloc(i32, i32) nounwind + +declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind diff --git a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll index 98a5263..45d356c 100644 --- a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll +++ b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll @@ -11,8 +11,8 @@ define weak arm_aapcs_vfpcc i32 @_ZNKSs7compareERKSs(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) { ; CHECK: _ZNKSs7compareERKSs: ; CHECK: it eq -; CHECK-NEXT: subeq.w r0, r6, r8 -; CHECK-NEXT: ldmia.w sp!, {r4, r5, r6, r8, r9, pc} +; CHECK-NEXT: subeq r0, r6, r7 +; CHECK-NEXT: ldmia.w sp!, {r4, r5, r6, r7, r8, pc} entry: %0 = tail call arm_aapcs_vfpcc i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this) ; <i32> [#uses=3] %1 = tail call arm_aapcs_vfpcc i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) ; <i32> [#uses=3] diff --git a/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll b/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll index e0946c7..2246de3 100644 --- a/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll +++ b/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll @@ -7,19 +7,12 @@ define void @t() nounwind ssp { entry: ; CHECK: t: -; CHECK: push {r4, r7} -; CHECK: mov r0, sp -; CHECK: add r7, sp, #4 -; CHECK: bic r0, r0, #7 + %size = mul i32 8, 2 ; CHECK: subs r0, #16 ; CHECK: mov sp, r0 -; CHECK: mov r0, sp -; CHECK: bic r0, r0, #7 + %vla_a = alloca i8, i32 %size, align 8 ; CHECK: subs r0, #16 ; CHECK: mov sp, r0 - - %size = mul i32 8, 2 - %vla_a = alloca i8, i32 %size, align 8 %vla_b = alloca i8, i32 %size, align 8 unreachable } diff --git a/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll index 2675006..9ed6a01 100644 --- a/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll +++ b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll @@ -23,8 +23,9 @@ entry: %4 = insertelement <2 x double> %2, double %V.0.ph, i32 1 ; <<2 x double>> [#uses=2] ; Constant pool load followed by add. ; Then clobber the loaded register, not the sum. -; CHECK: vldr.64 [[LDR:d.]] -; CHECK: vadd.f64 [[ADD:d.]], [[LDR]], [[LDR]] +; CHECK: vldr.64 [[LDR:d.*]], +; CHECK: LPC0_0: +; CHECK: vadd.f64 [[ADD:d.*]], [[LDR]], [[LDR]] ; CHECK: vmov.f64 [[LDR]] %5 = fadd <2 x double> %3, %3 ; <<2 x double>> [#uses=2] %6 = fadd <2 x double> %4, %4 ; <<2 x double>> [#uses=2] diff --git a/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll b/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll index c5fc509..f91e1c9 100644 --- a/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll +++ b/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O3 -relocation-model=pic -mcpu=cortex-a8 | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O3 -relocation-model=pic | FileCheck %s ; rdar://8115404 ; Tail merging must not split an IT block. diff --git a/test/CodeGen/Thumb2/buildvector-crash.ll b/test/CodeGen/Thumb2/buildvector-crash.ll new file mode 100644 index 0000000..01ef472 --- /dev/null +++ b/test/CodeGen/Thumb2/buildvector-crash.ll @@ -0,0 +1,17 @@ +; RUN: llc < %s -O3 -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 | FileCheck %s +; Formerly crashed, 3573915. + +define void @RotateStarsFP_Vec() nounwind { +bb.nph372: + br label %bb8 + +bb8: ; preds = %bb8, %bb.nph372 + %0 = fadd <4 x float> undef, <float 0xBFEE353F80000000, float 0xBFEE353F80000000, float 0xBFEE353F80000000, float 0xBFEE353F80000000> + %1 = fmul <4 x float> %0, undef + %2 = fmul <4 x float> %1, undef + %3 = fadd <4 x float> undef, %2 + store <4 x float> %3, <4 x float>* undef, align 4 + br label %bb8 +; CHECK: RotateStarsFP_Vec: +; CHECK: vldmia +} diff --git a/test/CodeGen/Thumb2/cortex-fp.ll b/test/CodeGen/Thumb2/cortex-fp.ll index f7ec5a3..d06f8a7 100644 --- a/test/CodeGen/Thumb2/cortex-fp.ll +++ b/test/CodeGen/Thumb2/cortex-fp.ll @@ -19,6 +19,6 @@ entry: %0 = fmul double %a, %b ; CORTEXM3: blx ___muldf3 ; CORTEXM4: blx ___muldf3 -; CORTEXA8: vmul.f64 d0, d1, d0 +; CORTEXA8: vmul.f64 d16, d17, d16 ret double %0 } diff --git a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll index 583f405..c169fb3 100644 --- a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll +++ b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll @@ -1,15 +1,23 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep vmov.f32 | count 1 +; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | FileCheck %s define void @fht(float* nocapture %fz, i16 signext %n) nounwind { +; CHECK: fht: entry: br label %bb5 bb5: ; preds = %bb5, %entry +; CHECK: %bb5 +; CHECK: bne br i1 undef, label %bb5, label %bb.nph bb.nph: ; preds = %bb5 br label %bb7 +; Loop preheader +; CHECK: vmov.f32 +; CHECK: vsub.f32 +; CHECK: vadd.f32 +; CHECK: vmul.f32 bb7: ; preds = %bb9, %bb.nph %s1.02 = phi float [ undef, %bb.nph ], [ %35, %bb9 ] ; <float> [#uses=3] %tmp79 = add i32 undef, undef ; <i32> [#uses=1] @@ -19,6 +27,9 @@ bb7: ; preds = %bb9, %bb.nph br label %bb8 bb8: ; preds = %bb8, %bb7 +; CHECK: %bb8 +; CHECK-NOT: vmov.f32 +; CHECK: blt %tmp54 = add i32 0, %tmp53 ; <i32> [#uses=0] %fi.1 = getelementptr float* %fz, i32 undef ; <float*> [#uses=2] %tmp80 = add i32 0, %tmp79 ; <i32> [#uses=1] @@ -62,6 +73,8 @@ bb8: ; preds = %bb8, %bb7 br i1 %34, label %bb8, label %bb9 bb9: ; preds = %bb8 +; CHECK: %bb9 +; CHECK: vmov.f32 %35 = fadd float 0.000000e+00, undef ; <float> [#uses=1] br label %bb7 } diff --git a/test/CodeGen/Thumb2/large-stack.ll b/test/CodeGen/Thumb2/large-stack.ll index 9729534..763d316 100644 --- a/test/CodeGen/Thumb2/large-stack.ll +++ b/test/CodeGen/Thumb2/large-stack.ll @@ -27,7 +27,7 @@ define i32 @test3() { ; DARWIN: sub.w sp, sp, #805306368 ; DARWIN: sub sp, #20 ; LINUX: test3: -; LINUX: stmdb sp!, {r4, r7, r11, lr} +; LINUX: push {r4, r7, r11, lr} ; LINUX: sub.w sp, sp, #805306368 ; LINUX: sub sp, #16 %retval = alloca i32, align 4 diff --git a/test/CodeGen/Thumb2/ldr-str-imm12.ll b/test/CodeGen/Thumb2/ldr-str-imm12.ll index 29b8e75..650d788 100644 --- a/test/CodeGen/Thumb2/ldr-str-imm12.ll +++ b/test/CodeGen/Thumb2/ldr-str-imm12.ll @@ -22,7 +22,7 @@ define %union.rec* @Manifest(%union.rec* %x, %union.rec* %env, %struct.STYLE* %style, %union.rec** %bthr, %union.rec** %fthr, %union.rec** %target, %union.rec** %crs, i32 %ok, i32 %need_expand, %union.rec** %enclose, i32 %fcr) nounwind { entry: -; CHECK: ldr.w r9, [r7, #28] +; CHECK: ldr.w {{(r[0-9])|(lr)}}, [r7, #28] %xgaps.i = alloca [32 x %union.rec*], align 4 ; <[32 x %union.rec*]*> [#uses=0] %ycomp.i = alloca [32 x %union.rec*], align 4 ; <[32 x %union.rec*]*> [#uses=0] br label %bb20 @@ -46,9 +46,9 @@ bb119: ; preds = %bb20, %bb20 bb420: ; preds = %bb20, %bb20 ; CHECK: bb420 -; CHECK: str r{{[0-7]}}, [sp] -; CHECK: str r{{[0-7]}}, [sp, #4] -; CHECK: str r{{[0-7]}}, [sp, #8] +; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp] +; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #4] +; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #8] ; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #24] store %union.rec* null, %union.rec** @zz_hold, align 4 store %union.rec* null, %union.rec** @zz_res, align 4 diff --git a/test/CodeGen/Thumb2/lsr-deficiency.ll b/test/CodeGen/Thumb2/lsr-deficiency.ll index 7fa782f..ad957a1 100644 --- a/test/CodeGen/Thumb2/lsr-deficiency.ll +++ b/test/CodeGen/Thumb2/lsr-deficiency.ll @@ -21,8 +21,8 @@ entry: bb: ; preds = %bb, %entry ; CHECK: LBB0_1: ; CHECK: cmp r2, #0 -; CHECK: sub.w r9, r2, #1 -; CHECK: mov r2, r9 +; CHECK: sub{{(.w)?}} [[REGISTER:(r[0-9]+)|(lr)]], r2, #1 +; CHECK: mov r2, [[REGISTER]] %0 = phi i32 [ %.pre, %entry ], [ %3, %bb ] ; <i32> [#uses=1] %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2] diff --git a/test/CodeGen/Thumb2/machine-licm-vdup.ll b/test/CodeGen/Thumb2/machine-licm-vdup.ll deleted file mode 100644 index fde2ee0..0000000 --- a/test/CodeGen/Thumb2/machine-licm-vdup.ll +++ /dev/null @@ -1,38 +0,0 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -disable-fp-elim -arm-vdup-splat | FileCheck %s -; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim -arm-vdup-splat | FileCheck %s -; Modified version of machine-licm.ll with -arm-vdup-splat turned on, 8003375. -; Eventually this should become the default and be moved into machine-licm.ll. -; FIXME: the vdup should be hoisted out of the loop, 8248029. - -define void @t2(i8* %ptr1, i8* %ptr2) nounwind { -entry: -; CHECK: t2: -; CHECK: mov.w r3, #1065353216 - br i1 undef, label %bb1, label %bb2 - -bb1: -; CHECK-NEXT: %bb1 -; CHECK: vdup.32 q1, r3 - %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ] - %tmp1 = shl i32 %indvar, 2 - %gep1 = getelementptr i8* %ptr1, i32 %tmp1 - %tmp2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %gep1, i32 1) - %tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %tmp2) - %gep2 = getelementptr i8* %ptr2, i32 %tmp1 - call void @llvm.arm.neon.vst1.v4f32(i8* %gep2, <4 x float> %tmp3, i32 1) - %indvar.next = add i32 %indvar, 1 - %cond = icmp eq i32 %indvar.next, 10 - br i1 %cond, label %bb2, label %bb1 - -bb2: - ret void -} - -; CHECK-NOT: LCPI1_0: -; CHECK: .subsections_via_symbols - -declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly - -declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind - -declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone diff --git a/test/CodeGen/Thumb2/machine-licm.ll b/test/CodeGen/Thumb2/machine-licm.ll index b949b2f..14d04a4 100644 --- a/test/CodeGen/Thumb2/machine-licm.ll +++ b/test/CodeGen/Thumb2/machine-licm.ll @@ -55,8 +55,8 @@ return: ; preds = %bb, %entry define void @t2(i8* %ptr1, i8* %ptr2) nounwind { entry: ; CHECK: t2: -; CHECK: adr r{{.}}, #LCPI1_0 -; CHECK: vldmia r3, {d0, d1} +; CHECK: mov.w r3, #1065353216 +; CHECK: vdup.32 q{{.*}}, r3 br i1 undef, label %bb1, label %bb2 bb1: @@ -76,8 +76,8 @@ bb2: ret void } -; CHECK: LCPI1_0: -; CHECK: .section +; CHECK-NOT: LCPI1_0: +; CHECK: .subsections_via_symbols declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly diff --git a/test/CodeGen/Thumb2/thumb2-badreg-operands.ll b/test/CodeGen/Thumb2/thumb2-badreg-operands.ll deleted file mode 100644 index 4df06b8..0000000 --- a/test/CodeGen/Thumb2/thumb2-badreg-operands.ll +++ /dev/null @@ -1,15 +0,0 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 | FileCheck %s - -define void @b(i32 %x) nounwind optsize { -entry: -; CHECK: b -; CHECK: mov r2, sp -; CHECK: mls r0, r0, r1, r2 -; CHECK: mov sp, r0 - %0 = mul i32 %x, 24 ; <i32> [#uses=1] - %vla = alloca i8, i32 %0, align 1 ; <i8*> [#uses=1] - call arm_aapcscc void @a(i8* %vla) nounwind optsize - ret void -} - -declare void @a(i8*) optsize diff --git a/test/CodeGen/Thumb2/thumb2-barrier.ll b/test/CodeGen/Thumb2/thumb2-barrier.ll index a54d09e6..93ae7c4 100644 --- a/test/CodeGen/Thumb2/thumb2-barrier.ll +++ b/test/CodeGen/Thumb2/thumb2-barrier.ll @@ -1,17 +1,31 @@ ; RUN: llc < %s -march=thumb -mcpu=cortex-a8 | FileCheck %s -declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1 ) +declare void @llvm.memory.barrier(i1 , i1 , i1 , i1 , i1) -define void @t1() { -; CHECK: t1: -; CHECK: dsb - call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 true ) +define void @t_st() { +; CHECK: t_st: +; CHECK: dmb st + call void @llvm.memory.barrier(i1 false, i1 false, i1 false, i1 true, i1 true) ret void } -define void @t2() { -; CHECK: t2: -; CHECK: dmb - call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 false ) +define void @t_sy() { +; CHECK: t_sy: +; CHECK: dmb sy + call void @llvm.memory.barrier(i1 true, i1 false, i1 false, i1 true, i1 true) + ret void +} + +define void @t_ishst() { +; CHECK: t_ishst: +; CHECK: dmb ishst + call void @llvm.memory.barrier(i1 false, i1 false, i1 false, i1 true, i1 false) + ret void +} + +define void @t_ish() { +; CHECK: t_ish: +; CHECK: dmb ish + call void @llvm.memory.barrier(i1 true, i1 false, i1 false, i1 true, i1 false) ret void } diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll index 4f92c93..0d73fba 100644 --- a/test/CodeGen/Thumb2/thumb2-spill-q.ll +++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll @@ -20,6 +20,26 @@ entry: %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1] store float 0.000000e+00, float* undef, align 4 %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1] + %ld3 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind + store float 0.000000e+00, float* undef, align 4 + %ld4 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind + store float 0.000000e+00, float* undef, align 4 + %ld5 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind + store float 0.000000e+00, float* undef, align 4 + %ld6 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind + store float 0.000000e+00, float* undef, align 4 + %ld7 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind + store float 0.000000e+00, float* undef, align 4 + %ld8 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind + store float 0.000000e+00, float* undef, align 4 + %ld9 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind + store float 0.000000e+00, float* undef, align 4 + %ld10 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind + store float 0.000000e+00, float* undef, align 4 + %ld11 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind + store float 0.000000e+00, float* undef, align 4 + %ld12 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind + store float 0.000000e+00, float* undef, align 4 %val173 = load <4 x float>* undef ; <<4 x float>> [#uses=1] br label %bb4 @@ -44,7 +64,16 @@ bb4: ; preds = %bb193, %entry %18 = fmul <4 x float> %17, %val173 ; <<4 x float>> [#uses=1] %19 = shufflevector <4 x float> %18, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1] %20 = shufflevector <2 x float> %19, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1] - %21 = fadd <4 x float> zeroinitializer, %20 ; <<4 x float>> [#uses=2] + %tmp1 = fadd <4 x float> %20, %ld3 + %tmp2 = fadd <4 x float> %tmp1, %ld4 + %tmp3 = fadd <4 x float> %tmp2, %ld5 + %tmp4 = fadd <4 x float> %tmp3, %ld6 + %tmp5 = fadd <4 x float> %tmp4, %ld7 + %tmp6 = fadd <4 x float> %tmp5, %ld8 + %tmp7 = fadd <4 x float> %tmp6, %ld9 + %tmp8 = fadd <4 x float> %tmp7, %ld10 + %tmp9 = fadd <4 x float> %tmp8, %ld11 + %21 = fadd <4 x float> %tmp9, %ld12 %22 = fcmp ogt <4 x float> %besterror.0.2264, %21 ; <<4 x i1>> [#uses=0] %tmp = extractelement <4 x i1> %22, i32 0 br i1 %tmp, label %bb193, label %bb186 |