diff options
Diffstat (limited to 'test/CodeGen/X86')
65 files changed, 336 insertions, 278 deletions
diff --git a/test/CodeGen/X86/2006-05-02-InstrSched1.ll b/test/CodeGen/X86/2006-05-02-InstrSched1.ll index 0afddd8..69266dc 100644 --- a/test/CodeGen/X86/2006-05-02-InstrSched1.ll +++ b/test/CodeGen/X86/2006-05-02-InstrSched1.ll @@ -1,7 +1,10 @@ ; REQUIRES: asserts ; RUN: llc < %s -march=x86 -relocation-model=static -stats 2>&1 | \ -; RUN: grep asm-printer | grep 14 +; RUN: grep asm-printer | grep 16 ; +; It's possible to schedule this in 14 instructions by avoiding +; callee-save registers, but the scheduler isn't currently that +; conervative with registers. @size20 = external global i32 ; <i32*> [#uses=1] @in5 = external global i8* ; <i8**> [#uses=1] @@ -21,4 +24,3 @@ define i32 @compare(i8* %a, i8* %b) nounwind { } declare i32 @memcmp(i8*, i8*, i32) - diff --git a/test/CodeGen/X86/2007-01-08-InstrSched.ll b/test/CodeGen/X86/2007-01-08-InstrSched.ll index 24aa5b9..4ec7039 100644 --- a/test/CodeGen/X86/2007-01-08-InstrSched.ll +++ b/test/CodeGen/X86/2007-01-08-InstrSched.ll @@ -13,10 +13,10 @@ define float @foo(float %x) nounwind { ; CHECK: mulss ; CHECK: mulss -; CHECK: addss ; CHECK: mulss -; CHECK: addss ; CHECK: mulss ; CHECK: addss +; CHECK: addss +; CHECK: addss ; CHECK: ret } diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll index 43c2397..764c2cd 100644 --- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll +++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll @@ -17,9 +17,9 @@ bb4: ; preds = %bb.i, %bb26, %bb4, %entry ; CHECK: %bb4 ; CHECK: xorl ; CHECK: callq -; CHECK: movq ; CHECK: xorl ; CHECK: xorl +; CHECK: movq %0 = call i32 (...)* @xxGetOffsetForCode(i32 undef) nounwind ; <i32> [#uses=0] %ins = or i64 %p, 2097152 ; <i64> [#uses=1] diff --git a/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll b/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll index d4a74c9..060c535 100644 --- a/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll +++ b/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll @@ -1,9 +1,9 @@ -; RUN: llc -mcpu=generic -mtriple=i386-apple-darwin -tailcallopt < %s | FileCheck %s +; RUN: llc -mcpu=generic -mtriple=i386-apple-darwin -tailcallopt -enable-misched=false < %s | FileCheck %s ; Check that lowered argumens do not overwrite the return address before it is moved. ; Bug 6225 ; ; If a call is a fastcc tail call and tail call optimization is enabled, the -; caller frame is replaced by the callee frame. This can require that arguments are +; caller frame is replaced by the callee frame. This can require that arguments are ; placed on the former return address stack slot. Special care needs to be taken ; taken that the return address is moved / or stored in a register before ; lowering of arguments potentially overwrites the value. @@ -51,5 +51,3 @@ false: tail call fastcc void @l298(i32 %r10, i32 %r9, i32 %r4) noreturn nounwind ret void } - - diff --git a/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll b/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll index 1b33977..39d89e3 100644 --- a/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll +++ b/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll @@ -19,8 +19,8 @@ entry: } ; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip) -; CHECK: movb 38(%rsp), [[R0:%.+]] -; CHECK: movb 8(%rsp), [[R1:%.+]] -; CHECK: movb [[R1]], 8(%rsp) -; CHECK: movb [[R0]], 38(%rsp) +; CHECK: movb (%rsp), [[R1:%.+]] +; CHECK: movb 30(%rsp), [[R0:%.+]] +; CHECK: movb [[R1]], (%rsp) +; CHECK: movb [[R0]], 30(%rsp) ; CHECK: callq ___stack_chk_fail diff --git a/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll b/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll index da734d4..07a6910 100644 --- a/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll +++ b/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll @@ -16,8 +16,8 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK: main define i32 @main() nounwind uwtable { entry: -; CHECK: pmovsxbq j(%rip), % ; CHECK: pmovsxbq i(%rip), % +; CHECK: pmovsxbq j(%rip), % %0 = load <2 x i8>* @i, align 8 %1 = load <2 x i8>* @j, align 8 %div = sdiv <2 x i8> %1, %0 @@ -25,4 +25,3 @@ entry: ret i32 0 ; CHECK: ret } - diff --git a/test/CodeGen/X86/2012-04-26-sdglue.ll b/test/CodeGen/X86/2012-04-26-sdglue.ll index 186fafb..16706ae 100644 --- a/test/CodeGen/X86/2012-04-26-sdglue.ll +++ b/test/CodeGen/X86/2012-04-26-sdglue.ll @@ -5,8 +5,8 @@ ; It's hard to test for the ISEL condition because CodeGen optimizes ; away the bugpointed code. Just ensure the basics are still there. ;CHECK-LABEL: func: -;CHECK: vxorps -;CHECK: vinsertf128 +;CHECK: vpxor +;CHECK: vinserti128 ;CHECK: vpshufd ;CHECK: vpshufd ;CHECK: vmulps diff --git a/test/CodeGen/X86/3addr-16bit.ll b/test/CodeGen/X86/3addr-16bit.ll index 77c3c16..fafdfdb 100644 --- a/test/CodeGen/X86/3addr-16bit.ll +++ b/test/CodeGen/X86/3addr-16bit.ll @@ -34,7 +34,8 @@ entry: ; 64BIT-LABEL: t2: ; 64BIT-NOT: movw %si, %ax -; 64BIT: leal -1(%rsi), %eax +; 64BIT: decl %eax +; 64BIT: movzwl %ax %0 = icmp eq i16 %k, %c ; <i1> [#uses=1] %1 = add i16 %k, -1 ; <i16> [#uses=3] br i1 %0, label %bb, label %bb1 @@ -58,7 +59,7 @@ entry: ; 64BIT-LABEL: t3: ; 64BIT-NOT: movw %si, %ax -; 64BIT: leal 2(%rsi), %eax +; 64BIT: addl $2, %eax %0 = add i16 %k, 2 ; <i16> [#uses=3] %1 = icmp eq i16 %k, %c ; <i1> [#uses=1] br i1 %1, label %bb, label %bb1 @@ -81,7 +82,7 @@ entry: ; 64BIT-LABEL: t4: ; 64BIT-NOT: movw %si, %ax -; 64BIT: leal (%rsi,%rdi), %eax +; 64BIT: addl %edi, %eax %0 = add i16 %k, %c ; <i16> [#uses=3] %1 = icmp eq i16 %k, %c ; <i1> [#uses=1] br i1 %1, label %bb, label %bb1 diff --git a/test/CodeGen/X86/StackColoring.ll b/test/CodeGen/X86/StackColoring.ll index f1d9296..a8e3537 100644 --- a/test/CodeGen/X86/StackColoring.ll +++ b/test/CodeGen/X86/StackColoring.ll @@ -4,8 +4,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" -;YESCOLOR: subq $136, %rsp -;NOCOLOR: subq $264, %rsp +;YESCOLOR: subq $144, %rsp +;NOCOLOR: subq $272, %rsp define i32 @myCall_w2(i32 %in) { entry: diff --git a/test/CodeGen/X86/abi-isel.ll b/test/CodeGen/X86/abi-isel.ll index 3b84231..633e70f 100644 --- a/test/CodeGen/X86/abi-isel.ll +++ b/test/CodeGen/X86/abi-isel.ll @@ -1,16 +1,16 @@ -; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-STATIC -; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-PIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-32-STATIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-32-PIC -; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-64-STATIC -; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=LINUX-64-PIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-64-STATIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-64-PIC -; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-32-STATIC -; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC -; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-PIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-32-STATIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-32-PIC -; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-64-STATIC -; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC -; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-PIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-64-STATIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-64-PIC @src = external global [131072 x i32] @dst = external global [131072 x i32] diff --git a/test/CodeGen/X86/add.ll b/test/CodeGen/X86/add.ll index f36577b..62a62a4 100644 --- a/test/CodeGen/X86/add.ll +++ b/test/CodeGen/X86/add.ll @@ -9,7 +9,7 @@ define i32 @test1(i32 inreg %a) nounwind { %b = add i32 %a, 128 ret i32 %b ; X32: subl $-128, %eax -; X64: subl $-128, +; X64: subl $-128, } define i64 @test2(i64 inreg %a) nounwind { %b = add i64 %a, 2147483648 @@ -20,7 +20,7 @@ define i64 @test2(i64 inreg %a) nounwind { define i64 @test3(i64 inreg %a) nounwind { %b = add i64 %a, 128 ret i64 %b - + ; X32: addl $128, %eax ; X64: subq $-128, } @@ -38,7 +38,7 @@ normal: overflow: ret i1 false - + ; X32-LABEL: test4: ; X32: addl ; X32-NEXT: jo @@ -82,11 +82,11 @@ define i64 @test6(i64 %A, i32 %B) nounwind { ret i64 %tmp5 ; X32-LABEL: test6: -; X32: movl 12(%esp), %edx +; X32: movl 4(%esp), %eax +; X32-NEXT: movl 12(%esp), %edx ; X32-NEXT: addl 8(%esp), %edx -; X32-NEXT: movl 4(%esp), %eax ; X32-NEXT: ret - + ; X64-LABEL: test6: ; X64: shlq $32, %r[[A1]] ; X64: leaq (%r[[A1]],%r[[A0]]), %rax diff --git a/test/CodeGen/X86/alloca-align-rounding.ll b/test/CodeGen/X86/alloca-align-rounding.ll index 3d76fb0..74b9470 100644 --- a/test/CodeGen/X86/alloca-align-rounding.ll +++ b/test/CodeGen/X86/alloca-align-rounding.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux | FileCheck %s +; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux -enable-misched=false | FileCheck %s declare void @bar(<2 x i64>* %n) diff --git a/test/CodeGen/X86/avx-arith.ll b/test/CodeGen/X86/avx-arith.ll index 4aa3370..a9da1ec 100644 --- a/test/CodeGen/X86/avx-arith.ll +++ b/test/CodeGen/X86/avx-arith.ll @@ -240,15 +240,15 @@ define <16 x i16> @vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone { ; CHECK-NEXT: vpmuludq %xmm ; CHECK-NEXT: vpsllq $32, %xmm ; CHECK-NEXT: vpaddq %xmm -; CHECK-NEXT: vpmuludq %xmm ; CHECK-NEXT: vpsrlq $32, %xmm ; CHECK-NEXT: vpmuludq %xmm ; CHECK-NEXT: vpsllq $32, %xmm +; CHECK-NEXT: vpaddq %xmm +; CHECK-NEXT: vpmuludq %xmm ; CHECK-NEXT: vpsrlq $32, %xmm ; CHECK-NEXT: vpmuludq %xmm ; CHECK-NEXT: vpsllq $32, %xmm ; CHECK-NEXT: vpaddq %xmm -; CHECK-NEXT: vpaddq %xmm ; CHECK-NEXT: vpsrlq $32, %xmm ; CHECK-NEXT: vpmuludq %xmm ; CHECK-NEXT: vpsllq $32, %xmm @@ -269,4 +269,3 @@ define <4 x float> @int_sqrt_ss() { %x2 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %x1) nounwind ret <4 x float> %x2 } - diff --git a/test/CodeGen/X86/avx-intel-ocl.ll b/test/CodeGen/X86/avx-intel-ocl.ll index 0550720..7337815 100644 --- a/test/CodeGen/X86/avx-intel-ocl.ll +++ b/test/CodeGen/X86/avx-intel-ocl.ll @@ -32,7 +32,7 @@ declare i32 @func_int(i32, i32) define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind { %y = alloca <16 x float>, align 16 %x = fadd <16 x float> %a, %b - %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y) + %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y) %2 = load <16 x float>* %y, align 16 %3 = fadd <16 x float> %2, %1 ret <16 x float> %3 @@ -43,21 +43,21 @@ define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind { ; preserved ymm6-ymm15 ; WIN64: testf16_regs ; WIN64: call -; WIN64: vaddps {{%ymm[6-7]}}, %ymm0, %ymm0 -; WIN64: vaddps {{%ymm[6-7]}}, %ymm1, %ymm1 +; WIN64: vaddps {{%ymm[6-7]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}} +; WIN64: vaddps {{%ymm[6-7]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}} ; WIN64: ret ; preserved ymm8-ymm15 ; X64: testf16_regs ; X64: call -; X64: vaddps {{%ymm[8-9]}}, %ymm0, %ymm0 -; X64: vaddps {{%ymm[8-9]}}, %ymm1, %ymm1 +; X64: vaddps {{%ymm[8-9]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}} +; X64: vaddps {{%ymm[8-9]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}} ; X64: ret define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind { %y = alloca <16 x float>, align 16 %x = fadd <16 x float> %a, %b - %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y) + %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y) %2 = load <16 x float>* %y, align 16 %3 = fadd <16 x float> %1, %b %4 = fadd <16 x float> %2, %3 @@ -166,4 +166,3 @@ entry: %8 = shufflevector <8 x float> %3, <8 x float> %7, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> ret <8 x float> %8 } - diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll index b0e64d1..0956361 100644 --- a/test/CodeGen/X86/avx-shuffle.ll +++ b/test/CodeGen/X86/avx-shuffle.ll @@ -81,7 +81,7 @@ entry: define i32 @test9(<4 x i32> %a) nounwind { ; CHECK: test9 ; CHECK: vpextrd - %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 undef, i32 4> + %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 undef, i32 4> %r = extractelement <8 x i32> %b, i32 2 ; CHECK: ret ret i32 %r @@ -251,6 +251,8 @@ define <8 x float> @test19(<8 x float> %A, <8 x float>%B) nounwind { ; CHECK: swap8doubles ; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}} ; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}} +; CHECK: vinsertf128 $1, {{[0-9]*}}(%rdi), %ymm{{[0-9]+}} +; CHECK: vinsertf128 $1, {{[0-9]*}}(%rdi), %ymm{{[0-9]+}} ; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}} ; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}} ; CHECK: vmovaps %xmm{{[0-9]+}}, {{[0-9]*}}(%rdi) diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll index bbad507..ed68ff7 100644 --- a/test/CodeGen/X86/avx512-cvt.ll +++ b/test/CodeGen/X86/avx512-cvt.ll @@ -167,8 +167,8 @@ define i32 @float_to_int(float %x) { } ; CHECK-LABEL: uitof64 -; CHECK: vextracti64x4 ; CHECK: vcvtudq2pd +; CHECK: vextracti64x4 ; CHECK: vcvtudq2pd ; CHECK: ret define <16 x double> @uitof64(<16 x i32> %a) nounwind { diff --git a/test/CodeGen/X86/avx512-mask-op.ll b/test/CodeGen/X86/avx512-mask-op.ll index eec8873..ef5cb56 100644 --- a/test/CodeGen/X86/avx512-mask-op.ll +++ b/test/CodeGen/X86/avx512-mask-op.ll @@ -27,8 +27,8 @@ define i16 @mand16(i16 %x, i16 %y) { %md = xor <16 x i1> %ma, %mb %me = or <16 x i1> %mc, %md %ret = bitcast <16 x i1> %me to i16 -; CHECK: kxorw ; CHECK: kandw +; CHECK: kxorw ; CHECK: korw ret i16 %ret } @@ -55,4 +55,3 @@ define i8 @shuf_test1(i16 %v) nounwind { %mask1 = bitcast <8 x i1> %mask to i8 ret i8 %mask1 } - diff --git a/test/CodeGen/X86/break-anti-dependencies.ll b/test/CodeGen/X86/break-anti-dependencies.ll index c942614..614d0ad 100644 --- a/test/CodeGen/X86/break-anti-dependencies.ll +++ b/test/CodeGen/X86/break-anti-dependencies.ll @@ -1,7 +1,7 @@ ; Without list-burr scheduling we may not see the difference in codegen here. ; Use a subtarget that has post-RA scheduling enabled because the anti-dependency ; breaker requires liveness information to be kept. -; RUN: llc < %s -march=x86-64 -mcpu=atom -post-RA-scheduler -pre-RA-sched=list-burr -break-anti-dependencies=none > %t +; RUN: llc < %s -march=x86-64 -mcpu=atom -enable-misched=false -post-RA-scheduler -pre-RA-sched=list-burr -break-anti-dependencies=none > %t ; RUN: grep "%xmm0" %t | count 14 ; RUN: not grep "%xmm1" %t ; RUN: llc < %s -march=x86-64 -mcpu=atom -post-RA-scheduler -break-anti-dependencies=critical > %t diff --git a/test/CodeGen/X86/bt.ll b/test/CodeGen/X86/bt.ll index e28923b..f12a354 100644 --- a/test/CodeGen/X86/bt.ll +++ b/test/CodeGen/X86/bt.ll @@ -38,7 +38,7 @@ UnifiedReturnBlock: ; preds = %entry define void @test2b(i32 %x, i32 %n) nounwind { entry: ; CHECK: test2b -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jb %tmp29 = lshr i32 %x, %n ; <i32> [#uses=1] %tmp3 = and i32 1, %tmp29 @@ -56,7 +56,7 @@ UnifiedReturnBlock: ; preds = %entry define void @atest2(i32 %x, i32 %n) nounwind { entry: ; CHECK: atest2 -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jb %tmp29 = ashr i32 %x, %n ; <i32> [#uses=1] %tmp3 = and i32 %tmp29, 1 ; <i32> [#uses=1] @@ -74,7 +74,7 @@ UnifiedReturnBlock: ; preds = %entry define void @atest2b(i32 %x, i32 %n) nounwind { entry: ; CHECK: atest2b -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} %tmp29 = ashr i32 %x, %n ; <i32> [#uses=1] %tmp3 = and i32 1, %tmp29 %tmp4 = icmp eq i32 %tmp3, 0 ; <i1> [#uses=1] @@ -91,7 +91,7 @@ UnifiedReturnBlock: ; preds = %entry define void @test3(i32 %x, i32 %n) nounwind { entry: ; CHECK: test3 -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jb %tmp29 = shl i32 1, %n ; <i32> [#uses=1] %tmp3 = and i32 %tmp29, %x ; <i32> [#uses=1] @@ -109,7 +109,7 @@ UnifiedReturnBlock: ; preds = %entry define void @test3b(i32 %x, i32 %n) nounwind { entry: ; CHECK: test3b -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jb %tmp29 = shl i32 1, %n ; <i32> [#uses=1] %tmp3 = and i32 %x, %tmp29 @@ -127,7 +127,7 @@ UnifiedReturnBlock: ; preds = %entry define void @testne2(i32 %x, i32 %n) nounwind { entry: ; CHECK: testne2 -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jae %tmp29 = lshr i32 %x, %n ; <i32> [#uses=1] %tmp3 = and i32 %tmp29, 1 ; <i32> [#uses=1] @@ -145,7 +145,7 @@ UnifiedReturnBlock: ; preds = %entry define void @testne2b(i32 %x, i32 %n) nounwind { entry: ; CHECK: testne2b -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jae %tmp29 = lshr i32 %x, %n ; <i32> [#uses=1] %tmp3 = and i32 1, %tmp29 @@ -163,7 +163,7 @@ UnifiedReturnBlock: ; preds = %entry define void @atestne2(i32 %x, i32 %n) nounwind { entry: ; CHECK: atestne2 -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jae %tmp29 = ashr i32 %x, %n ; <i32> [#uses=1] %tmp3 = and i32 %tmp29, 1 ; <i32> [#uses=1] @@ -181,7 +181,7 @@ UnifiedReturnBlock: ; preds = %entry define void @atestne2b(i32 %x, i32 %n) nounwind { entry: ; CHECK: atestne2b -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jae %tmp29 = ashr i32 %x, %n ; <i32> [#uses=1] %tmp3 = and i32 1, %tmp29 @@ -199,7 +199,7 @@ UnifiedReturnBlock: ; preds = %entry define void @testne3(i32 %x, i32 %n) nounwind { entry: ; CHECK: testne3 -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jae %tmp29 = shl i32 1, %n ; <i32> [#uses=1] %tmp3 = and i32 %tmp29, %x ; <i32> [#uses=1] @@ -217,7 +217,7 @@ UnifiedReturnBlock: ; preds = %entry define void @testne3b(i32 %x, i32 %n) nounwind { entry: ; CHECK: testne3b -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jae %tmp29 = shl i32 1, %n ; <i32> [#uses=1] %tmp3 = and i32 %x, %tmp29 @@ -235,7 +235,7 @@ UnifiedReturnBlock: ; preds = %entry define void @query2(i32 %x, i32 %n) nounwind { entry: ; CHECK: query2 -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jae %tmp29 = lshr i32 %x, %n ; <i32> [#uses=1] %tmp3 = and i32 %tmp29, 1 ; <i32> [#uses=1] @@ -253,7 +253,7 @@ UnifiedReturnBlock: ; preds = %entry define void @query2b(i32 %x, i32 %n) nounwind { entry: ; CHECK: query2b -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jae %tmp29 = lshr i32 %x, %n ; <i32> [#uses=1] %tmp3 = and i32 1, %tmp29 @@ -271,7 +271,7 @@ UnifiedReturnBlock: ; preds = %entry define void @aquery2(i32 %x, i32 %n) nounwind { entry: ; CHECK: aquery2 -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jae %tmp29 = ashr i32 %x, %n ; <i32> [#uses=1] %tmp3 = and i32 %tmp29, 1 ; <i32> [#uses=1] @@ -289,7 +289,7 @@ UnifiedReturnBlock: ; preds = %entry define void @aquery2b(i32 %x, i32 %n) nounwind { entry: ; CHECK: aquery2b -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jae %tmp29 = ashr i32 %x, %n ; <i32> [#uses=1] %tmp3 = and i32 1, %tmp29 @@ -307,7 +307,7 @@ UnifiedReturnBlock: ; preds = %entry define void @query3(i32 %x, i32 %n) nounwind { entry: ; CHECK: query3 -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jae %tmp29 = shl i32 1, %n ; <i32> [#uses=1] %tmp3 = and i32 %tmp29, %x ; <i32> [#uses=1] @@ -325,7 +325,7 @@ UnifiedReturnBlock: ; preds = %entry define void @query3b(i32 %x, i32 %n) nounwind { entry: ; CHECK: query3b -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jae %tmp29 = shl i32 1, %n ; <i32> [#uses=1] %tmp3 = and i32 %x, %tmp29 @@ -343,7 +343,7 @@ UnifiedReturnBlock: ; preds = %entry define void @query3x(i32 %x, i32 %n) nounwind { entry: ; CHECK: query3x -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jae %tmp29 = shl i32 1, %n ; <i32> [#uses=1] %tmp3 = and i32 %tmp29, %x ; <i32> [#uses=1] @@ -361,7 +361,7 @@ UnifiedReturnBlock: ; preds = %entry define void @query3bx(i32 %x, i32 %n) nounwind { entry: ; CHECK: query3bx -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jae %tmp29 = shl i32 1, %n ; <i32> [#uses=1] %tmp3 = and i32 %x, %tmp29 @@ -379,7 +379,7 @@ UnifiedReturnBlock: ; preds = %entry define void @queryne2(i32 %x, i32 %n) nounwind { entry: ; CHECK: queryne2 -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jb %tmp29 = lshr i32 %x, %n ; <i32> [#uses=1] %tmp3 = and i32 %tmp29, 1 ; <i32> [#uses=1] @@ -397,7 +397,7 @@ UnifiedReturnBlock: ; preds = %entry define void @queryne2b(i32 %x, i32 %n) nounwind { entry: ; CHECK: queryne2b -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jb %tmp29 = lshr i32 %x, %n ; <i32> [#uses=1] %tmp3 = and i32 1, %tmp29 @@ -415,7 +415,7 @@ UnifiedReturnBlock: ; preds = %entry define void @aqueryne2(i32 %x, i32 %n) nounwind { entry: ; CHECK: aqueryne2 -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jb %tmp29 = ashr i32 %x, %n ; <i32> [#uses=1] %tmp3 = and i32 %tmp29, 1 ; <i32> [#uses=1] @@ -433,7 +433,7 @@ UnifiedReturnBlock: ; preds = %entry define void @aqueryne2b(i32 %x, i32 %n) nounwind { entry: ; CHECK: aqueryne2b -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jb %tmp29 = ashr i32 %x, %n ; <i32> [#uses=1] %tmp3 = and i32 1, %tmp29 @@ -451,7 +451,7 @@ UnifiedReturnBlock: ; preds = %entry define void @queryne3(i32 %x, i32 %n) nounwind { entry: ; CHECK: queryne3 -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jb %tmp29 = shl i32 1, %n ; <i32> [#uses=1] %tmp3 = and i32 %tmp29, %x ; <i32> [#uses=1] @@ -469,7 +469,7 @@ UnifiedReturnBlock: ; preds = %entry define void @queryne3b(i32 %x, i32 %n) nounwind { entry: ; CHECK: queryne3b -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jb %tmp29 = shl i32 1, %n ; <i32> [#uses=1] %tmp3 = and i32 %x, %tmp29 @@ -487,7 +487,7 @@ UnifiedReturnBlock: ; preds = %entry define void @queryne3x(i32 %x, i32 %n) nounwind { entry: ; CHECK: queryne3x -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jb %tmp29 = shl i32 1, %n ; <i32> [#uses=1] %tmp3 = and i32 %tmp29, %x ; <i32> [#uses=1] @@ -505,7 +505,7 @@ UnifiedReturnBlock: ; preds = %entry define void @queryne3bx(i32 %x, i32 %n) nounwind { entry: ; CHECK: queryne3bx -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jb %tmp29 = shl i32 1, %n ; <i32> [#uses=1] %tmp3 = and i32 %x, %tmp29 diff --git a/test/CodeGen/X86/byval7.ll b/test/CodeGen/X86/byval7.ll index 8a96e41..42751d7 100644 --- a/test/CodeGen/X86/byval7.ll +++ b/test/CodeGen/X86/byval7.ll @@ -7,14 +7,14 @@ define i32 @main() nounwind { entry: ; CHECK-LABEL: main: -; CHECK: movl $1, (%esp) ; CHECK: leal 16(%esp), %edi ; CHECK: leal 160(%esp), %esi ; CHECK: rep;movsl +; CHECK: movl $1, (%esp) %s = alloca %struct.S ; <%struct.S*> [#uses=2] %tmp15 = getelementptr %struct.S* %s, i32 0, i32 0 ; <<2 x i64>*> [#uses=1] store <2 x i64> < i64 8589934595, i64 1 >, <2 x i64>* %tmp15, align 16 - call void @t( i32 1, %struct.S* byval %s ) nounwind + call void @t( i32 1, %struct.S* byval %s ) nounwind ret i32 0 } diff --git a/test/CodeGen/X86/chain_order.ll b/test/CodeGen/X86/chain_order.ll index 8c1c864..c88726e 100644 --- a/test/CodeGen/X86/chain_order.ll +++ b/test/CodeGen/X86/chain_order.ll @@ -3,8 +3,8 @@ ;CHECK-LABEL: cftx020: ;CHECK: vmovsd (%rdi), %xmm{{.*}} ;CHECK: vmovsd 16(%rdi), %xmm{{.*}} -;CHECK: vmovhpd 8(%rdi), %xmm{{.*}} ;CHECK: vmovsd 24(%rdi), %xmm{{.*}} +;CHECK: vmovhpd 8(%rdi), %xmm{{.*}} ;CHECK: vmovupd %xmm{{.*}}, (%rdi) ;CHECK: vmovupd %xmm{{.*}}, 16(%rdi) ;CHECK: ret @@ -35,4 +35,3 @@ entry: store <2 x double> %14, <2 x double>* %15, align 8 ret void } - diff --git a/test/CodeGen/X86/cmov.ll b/test/CodeGen/X86/cmov.ll index 92c0445..215b862 100644 --- a/test/CodeGen/X86/cmov.ll +++ b/test/CodeGen/X86/cmov.ll @@ -4,8 +4,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 define i32 @test1(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone { entry: ; CHECK-LABEL: test1: -; CHECK: movl $12, %eax -; CHECK-NEXT: btl +; CHECK: btl +; CHECK-NEXT: movl $12, %eax ; CHECK-NEXT: cmovael (%rcx), %eax ; CHECK-NEXT: ret @@ -19,8 +19,8 @@ entry: define i32 @test2(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone { entry: ; CHECK-LABEL: test2: -; CHECK: movl $12, %eax -; CHECK-NEXT: btl +; CHECK: btl +; CHECK-NEXT: movl $12, %eax ; CHECK-NEXT: cmovbl (%rcx), %eax ; CHECK-NEXT: ret @@ -92,7 +92,7 @@ bb.i.i.i: ; preds = %entry ; CHECK: testb ; CHECK-NOT: xor ; CHECK: setne -; CHECK-NEXT: testb +; CHECK: testb func_4.exit.i: ; preds = %bb.i.i.i, %entry %.not.i = xor i1 %2, true ; <i1> [#uses=1] diff --git a/test/CodeGen/X86/commute-two-addr.ll b/test/CodeGen/X86/commute-two-addr.ll index eb44e08..656c385 100644 --- a/test/CodeGen/X86/commute-two-addr.ll +++ b/test/CodeGen/X86/commute-two-addr.ll @@ -38,10 +38,10 @@ define i32 @t2(i32 %X, i32 %Y) nounwind { define %0 @t3(i32 %lb, i8 zeroext %has_lb, i8 zeroext %lb_inclusive, i32 %ub, i8 zeroext %has_ub, i8 zeroext %ub_inclusive) nounwind { entry: ; DARWIN-LABEL: t3: -; DARWIN: shll $16 ; DARWIN: shlq $32, %rcx +; DARWIN-NEXT: orq %rcx, %rax +; DARWIN-NEXT: shll $8 ; DARWIN-NOT: leaq -; DARWIN: orq %rcx, %rax %tmp21 = zext i32 %lb to i64 %tmp23 = zext i32 %ub to i64 %tmp24 = shl i64 %tmp23, 32 diff --git a/test/CodeGen/X86/fast-isel-mem.ll b/test/CodeGen/X86/fast-isel-mem.ll index 7fcef03..cd2dc1d 100644 --- a/test/CodeGen/X86/fast-isel-mem.ll +++ b/test/CodeGen/X86/fast-isel-mem.ll @@ -40,7 +40,7 @@ entry: ; CHECK: movl L_LotsStuff$non_lazy_ptr, %ecx ; ATOM: _t: -; ATOM: movl L_LotsStuff$non_lazy_ptr, %ecx -; ATOM: movl $0, %eax +; ATOM: movl L_LotsStuff$non_lazy_ptr, %e{{..}} +; ATOM: movl $0, %e{{..}} } diff --git a/test/CodeGen/X86/fastcc.ll b/test/CodeGen/X86/fastcc.ll index 705ab7b..a362f8d 100644 --- a/test/CodeGen/X86/fastcc.ll +++ b/test/CodeGen/X86/fastcc.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 -post-RA-scheduler=false | FileCheck %s -; CHECK: movsd %xmm0, 8(%esp) -; CHECK: xorl %ecx, %ecx +; CHECK: movsd %xmm{{[0-9]}}, 8(%esp) +; CHECK: xorl %eax, %eax @d = external global double ; <double*> [#uses=1] @c = external global double ; <double*> [#uses=1] diff --git a/test/CodeGen/X86/fold-load.ll b/test/CodeGen/X86/fold-load.ll index a1fc7db..dde0a2d 100644 --- a/test/CodeGen/X86/fold-load.ll +++ b/test/CodeGen/X86/fold-load.ll @@ -38,10 +38,10 @@ L: store i16 %A, i16* %Q ret i32 %D - + ; CHECK-LABEL: test2: ; CHECK: movl 4(%esp), %eax -; CHECK-NEXT: movzwl (%eax), %ecx +; CHECK-NEXT: movzwl (%eax), %e{{..}} } diff --git a/test/CodeGen/X86/fold-pcmpeqd-2.ll b/test/CodeGen/X86/fold-pcmpeqd-2.ll index 0a3afb7..60a6844 100644 --- a/test/CodeGen/X86/fold-pcmpeqd-2.ll +++ b/test/CodeGen/X86/fold-pcmpeqd-2.ll @@ -54,22 +54,27 @@ forbody: ; preds = %forcond %mul310 = fmul <4 x float> %bitcast204.i104, zeroinitializer ; <<4 x float>> [#uses=2] %mul313 = fmul <4 x float> %bitcast204.i, zeroinitializer ; <<4 x float>> [#uses=1] %cmpunord.i11 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i8 3) nounwind ; <<4 x float>> [#uses=1] + %tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1] + %bitcast.i3 = bitcast <4 x float> %mul310 to <4 x i32> ; <<4 x i32>> [#uses=1] + %andps.i5 = and <4 x i32> %bitcast.i3, zeroinitializer ; <<4 x i32>> [#uses=1] + + call void null(<4 x float> %mul313, <4 x float> %cmpunord.i11, <4 x float> %tmp83, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind + + %tmp84 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul313, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1] + %bitcast6.i13 = bitcast <4 x float> %cmpunord.i11 to <4 x i32> ; <<4 x i32>> [#uses=2] %andps.i14 = add <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %bitcast6.i13 ; <<4 x i32>> [#uses=1] %not.i16 = xor <4 x i32> %bitcast6.i13, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] %andnps.i17 = add <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %not.i16 ; <<4 x i32>> [#uses=1] %orps.i18 = or <4 x i32> %andnps.i17, %andps.i14 ; <<4 x i32>> [#uses=1] %bitcast17.i19 = bitcast <4 x i32> %orps.i18 to <4 x float> ; <<4 x float>> [#uses=1] - %tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1] - %bitcast.i3 = bitcast <4 x float> %mul310 to <4 x i32> ; <<4 x i32>> [#uses=1] - %andps.i5 = and <4 x i32> %bitcast.i3, zeroinitializer ; <<4 x i32>> [#uses=1] + %bitcast11.i6 = bitcast <4 x float> %tmp83 to <4 x i32> ; <<4 x i32>> [#uses=1] %not.i7 = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] %andnps.i8 = and <4 x i32> %bitcast11.i6, %not.i7 ; <<4 x i32>> [#uses=1] - call void null(<4 x float> %mul313, <4 x float> %cmpunord.i11, <4 x float> %tmp83, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind %orps.i9 = or <4 x i32> %andnps.i8, %andps.i5 ; <<4 x i32>> [#uses=1] %bitcast17.i10 = bitcast <4 x i32> %orps.i9 to <4 x float> ; <<4 x float>> [#uses=1] - %tmp84 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul313, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1] + %bitcast6.i = bitcast <4 x float> zeroinitializer to <4 x i32> ; <<4 x i32>> [#uses=2] %andps.i = and <4 x i32> zeroinitializer, %bitcast6.i ; <<4 x i32>> [#uses=1] %bitcast11.i = bitcast <4 x float> %tmp84 to <4 x i32> ; <<4 x i32>> [#uses=1] diff --git a/test/CodeGen/X86/full-lsr.ll b/test/CodeGen/X86/full-lsr.ll index 0729dda..cbcc62a 100644 --- a/test/CodeGen/X86/full-lsr.ll +++ b/test/CodeGen/X86/full-lsr.ll @@ -4,7 +4,7 @@ define void @foo(float* nocapture %A, float* nocapture %B, float* nocapture %C, i32 %N) nounwind { ; ATOM: foo ; ATOM: addl -; ATOM: leal +; ATOM: addl ; ATOM: leal ; CHECK: foo diff --git a/test/CodeGen/X86/gather-addresses.ll b/test/CodeGen/X86/gather-addresses.ll index 72a5096..5f48b1e 100644 --- a/test/CodeGen/X86/gather-addresses.ll +++ b/test/CodeGen/X86/gather-addresses.ll @@ -1,21 +1,35 @@ -; RUN: llc -mtriple=x86_64-linux -mcpu=nehalem < %s | FileCheck %s -; RUN: llc -mtriple=x86_64-win32 -mcpu=nehalem < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-linux -mcpu=nehalem < %s | FileCheck %s --check-prefix=LIN +; RUN: llc -mtriple=x86_64-win32 -mcpu=nehalem < %s | FileCheck %s --check-prefix=WIN ; rdar://7398554 ; When doing vector gather-scatter index calculation with 32-bit indices, ; bounce the vector off of cache rather than shuffling each individual ; element out of the index vector. -; CHECK: andps ([[H:%rdx|%r8]]), %xmm0 -; CHECK: movaps %xmm0, {{(-24)?}}(%rsp) -; CHECK: movslq {{(-24)?}}(%rsp), %rax -; CHECK: movsd ([[P:%rdi|%rcx]],%rax,8), %xmm0 -; CHECK: movslq {{-20|4}}(%rsp), %rax -; CHECK: movhpd ([[P]],%rax,8), %xmm0 -; CHECK: movslq {{-16|8}}(%rsp), %rax -; CHECK: movsd ([[P]],%rax,8), %xmm1 -; CHECK: movslq {{-12|12}}(%rsp), %rax -; CHECK: movhpd ([[P]],%rax,8), %xmm1 +; CHECK: foo: +; LIN: movaps (%rsi), %xmm0 +; LIN: andps (%rdx), %xmm0 +; LIN: movaps %xmm0, -24(%rsp) +; LIN: movslq -24(%rsp), %[[REG1:r.+]] +; LIN: movslq -20(%rsp), %[[REG2:r.+]] +; LIN: movslq -16(%rsp), %[[REG3:r.+]] +; LIN: movslq -12(%rsp), %[[REG4:r.+]] +; LIN: movsd (%rdi,%[[REG1]],8), %xmm0 +; LIN: movhpd (%rdi,%[[REG2]],8), %xmm0 +; LIN: movsd (%rdi,%[[REG3]],8), %xmm1 +; LIN: movhpd (%rdi,%[[REG4]],8), %xmm1 + +; WIN: movaps (%rdx), %xmm0 +; WIN: andps (%r8), %xmm0 +; WIN: movaps %xmm0, (%rsp) +; WIN: movslq (%rsp), %[[REG1:r.+]] +; WIN: movslq 4(%rsp), %[[REG2:r.+]] +; WIN: movslq 8(%rsp), %[[REG3:r.+]] +; WIN: movslq 12(%rsp), %[[REG4:r.+]] +; WIN: movsd (%rcx,%[[REG1]],8), %xmm0 +; WIN: movhpd (%rcx,%[[REG2]],8), %xmm0 +; WIN: movsd (%rcx,%[[REG3]],8), %xmm1 +; WIN: movhpd (%rcx,%[[REG4]],8), %xmm1 define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind { %a = load <4 x i32>* %i diff --git a/test/CodeGen/X86/ghc-cc.ll b/test/CodeGen/X86/ghc-cc.ll index 0e65cfd..4dba2c0 100644 --- a/test/CodeGen/X86/ghc-cc.ll +++ b/test/CodeGen/X86/ghc-cc.ll @@ -28,10 +28,10 @@ entry: define cc 10 void @foo() nounwind { entry: - ; CHECK: movl base, %ebx - ; CHECK-NEXT: movl sp, %ebp + ; CHECK: movl r1, %esi ; CHECK-NEXT: movl hp, %edi - ; CHECK-NEXT: movl r1, %esi + ; CHECK-NEXT: movl sp, %ebp + ; CHECK-NEXT: movl base, %ebx %0 = load i32* @r1 %1 = load i32* @hp %2 = load i32* @sp @@ -42,4 +42,3 @@ entry: } declare cc 10 void @bar(i32, i32, i32, i32) - diff --git a/test/CodeGen/X86/ghc-cc64.ll b/test/CodeGen/X86/ghc-cc64.ll index fcf7e17..403391e 100644 --- a/test/CodeGen/X86/ghc-cc64.ll +++ b/test/CodeGen/X86/ghc-cc64.ll @@ -41,22 +41,22 @@ entry: define cc 10 void @foo() nounwind { entry: - ; CHECK: movq base(%rip), %r13 - ; CHECK-NEXT: movq sp(%rip), %rbp - ; CHECK-NEXT: movq hp(%rip), %r12 - ; CHECK-NEXT: movq r1(%rip), %rbx - ; CHECK-NEXT: movq r2(%rip), %r14 - ; CHECK-NEXT: movq r3(%rip), %rsi - ; CHECK-NEXT: movq r4(%rip), %rdi - ; CHECK-NEXT: movq r5(%rip), %r8 - ; CHECK-NEXT: movq r6(%rip), %r9 - ; CHECK-NEXT: movq splim(%rip), %r15 - ; CHECK-NEXT: movss f1(%rip), %xmm1 - ; CHECK-NEXT: movss f2(%rip), %xmm2 - ; CHECK-NEXT: movss f3(%rip), %xmm3 - ; CHECK-NEXT: movss f4(%rip), %xmm4 + ; CHECK: movsd d2(%rip), %xmm6 ; CHECK-NEXT: movsd d1(%rip), %xmm5 - ; CHECK-NEXT: movsd d2(%rip), %xmm6 + ; CHECK-NEXT: movss f4(%rip), %xmm4 + ; CHECK-NEXT: movss f3(%rip), %xmm3 + ; CHECK-NEXT: movss f2(%rip), %xmm2 + ; CHECK-NEXT: movss f1(%rip), %xmm1 + ; CHECK-NEXT: movq splim(%rip), %r15 + ; CHECK-NEXT: movq r6(%rip), %r9 + ; CHECK-NEXT: movq r5(%rip), %r8 + ; CHECK-NEXT: movq r4(%rip), %rdi + ; CHECK-NEXT: movq r3(%rip), %rsi + ; CHECK-NEXT: movq r2(%rip), %r14 + ; CHECK-NEXT: movq r1(%rip), %rbx + ; CHECK-NEXT: movq hp(%rip), %r12 + ; CHECK-NEXT: movq sp(%rip), %rbp + ; CHECK-NEXT: movq base(%rip), %r13 %0 = load double* @d2 %1 = load double* @d1 %2 = load float* @f4 @@ -83,4 +83,3 @@ entry: declare cc 10 void @bar(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, float, float, float, float, double, double) - diff --git a/test/CodeGen/X86/hipe-cc.ll b/test/CodeGen/X86/hipe-cc.ll index 76d17a0..b34417e 100644 --- a/test/CodeGen/X86/hipe-cc.ll +++ b/test/CodeGen/X86/hipe-cc.ll @@ -49,10 +49,10 @@ entry: store i32 %arg1, i32* %arg1_var store i32 %arg2, i32* %arg2_var - ; CHECK: movl 4(%esp), %edx - ; CHECK-NEXT: movl 8(%esp), %eax + ; CHECK: movl 16(%esp), %esi ; CHECK-NEXT: movl 12(%esp), %ebp - ; CHECK-NEXT: movl 16(%esp), %esi + ; CHECK-NEXT: movl 8(%esp), %eax + ; CHECK-NEXT: movl 4(%esp), %edx %0 = load i32* %hp_var %1 = load i32* %p_var %2 = load i32* %arg0_var diff --git a/test/CodeGen/X86/hipe-cc64.ll b/test/CodeGen/X86/hipe-cc64.ll index 5dbb5a2..27e1c72 100644 --- a/test/CodeGen/X86/hipe-cc64.ll +++ b/test/CodeGen/X86/hipe-cc64.ll @@ -5,10 +5,10 @@ define void @zap(i64 %a, i64 %b) nounwind { entry: ; CHECK: movq %rsi, %rax - ; CHECK-NEXT: movq %rdi, %rsi - ; CHECK-NEXT: movq %rax, %rdx ; CHECK-NEXT: movl $8, %ecx ; CHECK-NEXT: movl $9, %r8d + ; CHECK-NEXT: movq %rdi, %rsi + ; CHECK-NEXT: movq %rax, %rdx ; CHECK-NEXT: callq addfour %0 = call cc 11 {i64, i64, i64} @addfour(i64 undef, i64 undef, i64 %a, i64 %b, i64 8, i64 9) %res = extractvalue {i64, i64, i64} %0, 2 @@ -57,11 +57,11 @@ entry: store i64 %arg2, i64* %arg2_var store i64 %arg3, i64* %arg3_var - ; CHECK: movq 8(%rsp), %rcx - ; CHECK-NEXT: movq 16(%rsp), %rdx - ; CHECK-NEXT: movq 24(%rsp), %rsi + ; CHECK: movq 40(%rsp), %r15 ; CHECK-NEXT: movq 32(%rsp), %rbp - ; CHECK-NEXT: movq 40(%rsp), %r15 + ; CHECK-NEXT: movq 24(%rsp), %rsi + ; CHECK-NEXT: movq 16(%rsp), %rdx + ; CHECK-NEXT: movq 8(%rsp), %rcx %0 = load i64* %hp_var %1 = load i64* %p_var %2 = load i64* %arg0_var diff --git a/test/CodeGen/X86/lea-recursion.ll b/test/CodeGen/X86/lea-recursion.ll index 3f32fd2..9480600 100644 --- a/test/CodeGen/X86/lea-recursion.ll +++ b/test/CodeGen/X86/lea-recursion.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86-64 | grep lea | count 12 +; RUN: llc < %s -march=x86-64 | grep lea | count 13 ; This testcase was written to demonstrate an instruction-selection problem, ; however it also happens to expose a limitation in the DAGCombiner's @@ -44,4 +44,3 @@ entry: store i32 %tmp10.6, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 7) ret void } - diff --git a/test/CodeGen/X86/lea.ll b/test/CodeGen/X86/lea.ll index affd6bf..93cfe46 100644 --- a/test/CodeGen/X86/lea.ll +++ b/test/CodeGen/X86/lea.ll @@ -28,8 +28,7 @@ bb.nph: bb2: ret i32 %x_offs ; CHECK-LABEL: test2: -; CHECK: movl %e[[A0]], %eax -; CHECK: addl $-5, %eax +; CHECK: leal -5(%r[[A0:..]]), %eax ; CHECK: andl $-4, %eax ; CHECK: negl %eax ; CHECK: leal -4(%r[[A0]],%rax), %eax diff --git a/test/CodeGen/X86/load-slice.ll b/test/CodeGen/X86/load-slice.ll index b1f778c..85fd7f0 100644 --- a/test/CodeGen/X86/load-slice.ll +++ b/test/CodeGen/X86/load-slice.ll @@ -17,14 +17,14 @@ ; High slice starts at 4 (base + 4-bytes) and is 4-bytes aligned. ; ; STRESS-LABEL: t1: -; Load out[out_start + 8].imm, this is base + 8 * 8 + 4. -; STRESS: vmovss 68([[BASE:[^)]+]]), [[OUT_Imm:%xmm[0-9]+]] -; Add high slice: out[out_start].imm, this is base + 4. -; STRESS-NEXT: vaddss 4([[BASE]]), [[OUT_Imm]], [[RES_Imm:%xmm[0-9]+]] ; Load out[out_start + 8].real, this is base + 8 * 8 + 0. -; STRESS-NEXT: vmovss 64([[BASE]]), [[OUT_Real:%xmm[0-9]+]] +; STRESS: vmovss 64([[BASE:[^(]+]]), [[OUT_Real:%xmm[0-9]+]] ; Add low slice: out[out_start].real, this is base + 0. ; STRESS-NEXT: vaddss ([[BASE]]), [[OUT_Real]], [[RES_Real:%xmm[0-9]+]] +; Load out[out_start + 8].imm, this is base + 8 * 8 + 4. +; STRESS-NEXT: vmovss 68([[BASE]]), [[OUT_Imm:%xmm[0-9]+]] +; Add high slice: out[out_start].imm, this is base + 4. +; STRESS-NEXT: vaddss 4([[BASE]]), [[OUT_Imm]], [[RES_Imm:%xmm[0-9]+]] ; Swap Imm and Real. ; STRESS-NEXT: vinsertps $16, [[RES_Imm]], [[RES_Real]], [[RES_Vec:%xmm[0-9]+]] ; Put the results back into out[out_start]. @@ -32,14 +32,14 @@ ; ; Same for REGULAR, we eliminate register bank copy with each slices. ; REGULAR-LABEL: t1: -; Load out[out_start + 8].imm, this is base + 8 * 8 + 4. -; REGULAR: vmovss 68([[BASE:[^)]+]]), [[OUT_Imm:%xmm[0-9]+]] -; Add high slice: out[out_start].imm, this is base + 4. -; REGULAR-NEXT: vaddss 4([[BASE]]), [[OUT_Imm]], [[RES_Imm:%xmm[0-9]+]] ; Load out[out_start + 8].real, this is base + 8 * 8 + 0. -; REGULAR-NEXT: vmovss 64([[BASE]]), [[OUT_Real:%xmm[0-9]+]] +; REGULAR: vmovss 64([[BASE:[^)]+]]), [[OUT_Real:%xmm[0-9]+]] ; Add low slice: out[out_start].real, this is base + 0. ; REGULAR-NEXT: vaddss ([[BASE]]), [[OUT_Real]], [[RES_Real:%xmm[0-9]+]] +; Load out[out_start + 8].imm, this is base + 8 * 8 + 4. +; REGULAR-NEXT: vmovss 68([[BASE]]), [[OUT_Imm:%xmm[0-9]+]] +; Add high slice: out[out_start].imm, this is base + 4. +; REGULAR-NEXT: vaddss 4([[BASE]]), [[OUT_Imm]], [[RES_Imm:%xmm[0-9]+]] ; Swap Imm and Real. ; REGULAR-NEXT: vinsertps $16, [[RES_Imm]], [[RES_Real]], [[RES_Vec:%xmm[0-9]+]] ; Put the results back into out[out_start]. @@ -137,4 +137,3 @@ define i32 @t3(%class.Complex* nocapture %out, i64 %out_start) { %res = add i32 %slice32_lowhigh, %tmpres ret i32 %res } - diff --git a/test/CodeGen/X86/lsr-loop-exit-cond.ll b/test/CodeGen/X86/lsr-loop-exit-cond.ll index c7a3186..e7d74a9 100644 --- a/test/CodeGen/X86/lsr-loop-exit-cond.ll +++ b/test/CodeGen/X86/lsr-loop-exit-cond.ll @@ -2,12 +2,12 @@ ; RUN: llc -mtriple=x86_64-darwin -mcpu=atom < %s | FileCheck -check-prefix=ATOM %s ; CHECK-LABEL: t: -; CHECK: decq -; CHECK-NEXT: movl (%r9,%rax,4), %eax +; CHECK: movl (%r9,%rax,4), %e{{..}} +; CHECK-NEXT: decq ; CHECK-NEXT: jne ; ATOM-LABEL: t: -; ATOM: movl (%r9,%r{{.+}},4), %eax +; ATOM: movl (%r9,%r{{.+}},4), %e{{..}} ; ATOM-NEXT: decq ; ATOM-NEXT: jne diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll index c33cac2..4a4d178 100644 --- a/test/CodeGen/X86/masked-iv-safe.ll +++ b/test/CodeGen/X86/masked-iv-safe.ll @@ -1,15 +1,13 @@ -; RUN: llc < %s -mcpu=generic -march=x86-64 > %t -; RUN: not grep and %t -; RUN: not grep movz %t -; RUN: not grep sar %t -; RUN: not grep shl %t -; RUN: grep add %t | count 5 -; RUN: grep inc %t | count 2 -; RUN: grep lea %t | count 3 +; RUN: llc < %s -mcpu=generic -march=x86-64 | FileCheck %s ; Optimize away zext-inreg and sext-inreg on the loop induction ; variable using trip-count information. +; CHECK-LABEL: count_up +; CHECK-NOT: {{and|movz|sar|shl}} +; CHECK: inc +; CHECK-NOT: {{and|movz|sar|shl}} +; CHECK: jne define void @count_up(double* %d, i64 %n) nounwind { entry: br label %loop @@ -38,6 +36,11 @@ return: ret void } +; CHECK-LABEL: count_down +; CHECK-NOT: {{and|movz|sar|shl}} +; CHECK: addq +; CHECK-NOT: {{and|movz|sar|shl}} +; CHECK: jne define void @count_down(double* %d, i64 %n) nounwind { entry: br label %loop @@ -66,6 +69,11 @@ return: ret void } +; CHECK-LABEL: count_up_signed +; CHECK-NOT: {{and|movz|sar|shl}} +; CHECK: inc +; CHECK-NOT: {{and|movz|sar|shl}} +; CHECK: jne define void @count_up_signed(double* %d, i64 %n) nounwind { entry: br label %loop @@ -96,6 +104,11 @@ return: ret void } +; CHECK-LABEL: count_down_signed +; CHECK-NOT: {{and|movz|sar|shl}} +; CHECK: addq +; CHECK-NOT: {{and|movz|sar|shl}} +; CHECK: jne define void @count_down_signed(double* %d, i64 %n) nounwind { entry: br label %loop @@ -126,6 +139,11 @@ return: ret void } +; CHECK-LABEL: another_count_up +; CHECK-NOT: {{and|movz|sar|shl}} +; CHECK: addq +; CHECK-NOT: {{and|movz|sar|shl}} +; CHECK: jne define void @another_count_up(double* %d, i64 %n) nounwind { entry: br label %loop @@ -154,6 +172,11 @@ return: ret void } +; CHECK-LABEL: another_count_down +; CHECK-NOT: {{and|movz|sar|shl}} +; CHECK: decq +; CHECK-NOT: {{and|movz|sar|shl}} +; CHECK: jne define void @another_count_down(double* %d, i64 %n) nounwind { entry: br label %loop @@ -182,6 +205,11 @@ return: ret void } +; CHECK-LABEL: another_count_up_signed +; CHECK-NOT: {{and|movz|sar|shl}} +; CHECK: addq +; CHECK-NOT: {{and|movz|sar|shl}} +; CHECK: jne define void @another_count_up_signed(double* %d, i64 %n) nounwind { entry: br label %loop @@ -212,6 +240,11 @@ return: ret void } +; CHECK-LABEL: another_count_down_signed +; CHECK-NOT: {{and|movz|sar|shl}} +; CHECK: decq +; CHECK-NOT: {{and|movz|sar|shl}} +; CHECK: jne define void @another_count_down_signed(double* %d, i64 %n) nounwind { entry: br label %loop diff --git a/test/CodeGen/X86/memcpy-2.ll b/test/CodeGen/X86/memcpy-2.ll index c17cc7f..6ae7807 100644 --- a/test/CodeGen/X86/memcpy-2.ll +++ b/test/CodeGen/X86/memcpy-2.ll @@ -56,15 +56,15 @@ entry: define void @t2(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp { entry: ; SSE2-Darwin-LABEL: t2: -; SSE2-Darwin: movaps (%eax), %xmm0 +; SSE2-Darwin: movaps (%ecx), %xmm0 ; SSE2-Darwin: movaps %xmm0, (%eax) ; SSE2-Mingw32-LABEL: t2: -; SSE2-Mingw32: movaps (%eax), %xmm0 +; SSE2-Mingw32: movaps (%ecx), %xmm0 ; SSE2-Mingw32: movaps %xmm0, (%eax) ; SSE1-LABEL: t2: -; SSE1: movaps (%eax), %xmm0 +; SSE1: movaps (%ecx), %xmm0 ; SSE1: movaps %xmm0, (%eax) ; NOSSE-LABEL: t2: @@ -91,14 +91,14 @@ entry: define void @t3(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp { entry: ; SSE2-Darwin-LABEL: t3: -; SSE2-Darwin: movsd (%eax), %xmm0 -; SSE2-Darwin: movsd 8(%eax), %xmm1 +; SSE2-Darwin: movsd (%ecx), %xmm0 +; SSE2-Darwin: movsd 8(%ecx), %xmm1 ; SSE2-Darwin: movsd %xmm1, 8(%eax) ; SSE2-Darwin: movsd %xmm0, (%eax) ; SSE2-Mingw32-LABEL: t3: -; SSE2-Mingw32: movsd (%eax), %xmm0 -; SSE2-Mingw32: movsd 8(%eax), %xmm1 +; SSE2-Mingw32: movsd (%ecx), %xmm0 +; SSE2-Mingw32: movsd 8(%ecx), %xmm1 ; SSE2-Mingw32: movsd %xmm1, 8(%eax) ; SSE2-Mingw32: movsd %xmm0, (%eax) diff --git a/test/CodeGen/X86/pmul.ll b/test/CodeGen/X86/pmul.ll index 169fa33..7bf8a61 100644 --- a/test/CodeGen/X86/pmul.ll +++ b/test/CodeGen/X86/pmul.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=x86 -mattr=sse4.1 -mcpu=nehalem -stack-alignment=16 > %t ; RUN: grep pmul %t | count 12 -; RUN: grep mov %t | count 11 +; RUN: grep mov %t | count 14 define <4 x i32> @a(<4 x i32> %i) nounwind { %A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 > diff --git a/test/CodeGen/X86/pr14088.ll b/test/CodeGen/X86/pr14088.ll index 505e3b5..16f20d0 100644 --- a/test/CodeGen/X86/pr14088.ll +++ b/test/CodeGen/X86/pr14088.ll @@ -19,7 +19,14 @@ return: ret i32 %retval.0 } -; We were miscompiling this and using %ax instead of %cx in the movw. -; CHECK: movswl %cx, %ecx -; CHECK: movw %cx, (%rsi) -; CHECK: movslq %ecx, %rcx +; We were miscompiling this and using %ax instead of %cx in the movw +; in the following sequence: +; movswl %cx, %ecx +; movw %cx, (%rsi) +; movslq %ecx, %rcx +; +; We can't produce the above sequence without special SD-level +; heuristics. Now we produce this: +; CHECK: movw %ax, (%rsi) +; CHECK: cwtl +; CHECK: cltq diff --git a/test/CodeGen/X86/pr1505b.ll b/test/CodeGen/X86/pr1505b.ll index 9b0ef83..c348fec 100644 --- a/test/CodeGen/X86/pr1505b.ll +++ b/test/CodeGen/X86/pr1505b.ll @@ -57,11 +57,10 @@ entry: %tmp22 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEd( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp16, double %tmp1920 ) ; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1] %tmp30 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp22 ) ; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=0] ; reload: -; CHECK: fld -; CHECK: fstps ; CHECK: ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc %tmp34 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc( %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4cout, i8* getelementptr ([13 x i8]* @.str1, i32 0, i32 0) ) ; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1] %tmp3940 = fpext float %tmp1314 to double ; <double> [#uses=1] +; CHECK: fld ; CHECK: fstpl ; CHECK: ZNSolsEd %tmp42 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEd( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp34, double %tmp3940 ) ; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1] diff --git a/test/CodeGen/X86/pr16031.ll b/test/CodeGen/X86/pr16031.ll index ab0b5ef..ecf6218 100644 --- a/test/CodeGen/X86/pr16031.ll +++ b/test/CodeGen/X86/pr16031.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s +; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=corei7-avx -enable-misched=false | FileCheck %s ; CHECK-LABEL: main: ; CHECK: pushl %esi diff --git a/test/CodeGen/X86/pre-ra-sched.ll b/test/CodeGen/X86/pre-ra-sched.ll index b792ffa..70135d4 100644 --- a/test/CodeGen/X86/pre-ra-sched.ll +++ b/test/CodeGen/X86/pre-ra-sched.ll @@ -1,5 +1,6 @@ -; RUN: llc < %s -mtriple=x86_64-apple-macosx -debug-only=pre-RA-sched \ -; RUN: 2>&1 | FileCheck %s +; RUN-disabled: llc < %s -mtriple=x86_64-apple-macosx -pre-RA-sched=ilp -debug-only=pre-RA-sched \ +; RUN-disabled: 2>&1 | FileCheck %s +; RUN: true ; REQUIRES: asserts ; ; rdar:13279013: pre-RA-sched should not check all interferences and diff --git a/test/CodeGen/X86/rdrand.ll b/test/CodeGen/X86/rdrand.ll index 05fca0e..48182d0 100644 --- a/test/CodeGen/X86/rdrand.ll +++ b/test/CodeGen/X86/rdrand.ll @@ -11,10 +11,10 @@ define i32 @_rdrand16_step(i16* %random_val) { ret i32 %isvalid ; CHECK-LABEL: _rdrand16_step: ; CHECK: rdrandw %ax -; CHECK: movw %ax, (%r[[A0:di|cx]]) ; CHECK: movzwl %ax, %ecx ; CHECK: movl $1, %eax ; CHECK: cmovael %ecx, %eax +; CHECK: movw %cx, (%r[[A0:di|cx]]) ; CHECK: ret } @@ -26,9 +26,9 @@ define i32 @_rdrand32_step(i32* %random_val) { ret i32 %isvalid ; CHECK-LABEL: _rdrand32_step: ; CHECK: rdrandl %e[[T0:[a-z]+]] -; CHECK: movl %e[[T0]], (%r[[A0]]) ; CHECK: movl $1, %eax ; CHECK: cmovael %e[[T0]], %eax +; CHECK: movl %e[[T0]], (%r[[A0]]) ; CHECK: ret } @@ -40,9 +40,9 @@ define i32 @_rdrand64_step(i64* %random_val) { ret i32 %isvalid ; CHECK-LABEL: _rdrand64_step: ; CHECK: rdrandq %r[[T1:[a-z]+]] -; CHECK: movq %r[[T1]], (%r[[A0]]) ; CHECK: movl $1, %eax ; CHECK: cmovael %e[[T1]], %eax +; CHECK: movq %r[[T1]], (%r[[A0]]) ; CHECK: ret } diff --git a/test/CodeGen/X86/rdseed.ll b/test/CodeGen/X86/rdseed.ll index edc5069..c219b4a 100644 --- a/test/CodeGen/X86/rdseed.ll +++ b/test/CodeGen/X86/rdseed.ll @@ -12,10 +12,10 @@ define i32 @_rdseed16_step(i16* %random_val) { ret i32 %isvalid ; CHECK-LABEL: _rdseed16_step: ; CHECK: rdseedw %ax -; CHECK: movw %ax, (%r[[A0:di|cx]]) ; CHECK: movzwl %ax, %ecx ; CHECK: movl $1, %eax ; CHECK: cmovael %ecx, %eax +; CHECK: movw %cx, (%r[[A0:di|cx]]) ; CHECK: ret } @@ -27,9 +27,9 @@ define i32 @_rdseed32_step(i32* %random_val) { ret i32 %isvalid ; CHECK-LABEL: _rdseed32_step: ; CHECK: rdseedl %e[[T0:[a-z]+]] -; CHECK: movl %e[[T0]], (%r[[A0]]) ; CHECK: movl $1, %eax ; CHECK: cmovael %e[[T0]], %eax +; CHECK: movl %e[[T0]], (%r[[A0]]) ; CHECK: ret } @@ -41,8 +41,8 @@ define i32 @_rdseed64_step(i64* %random_val) { ret i32 %isvalid ; CHECK-LABEL: _rdseed64_step: ; CHECK: rdseedq %r[[T1:[a-z]+]] -; CHECK: movq %r[[T1]], (%r[[A0]]) ; CHECK: movl $1, %eax ; CHECK: cmovael %e[[T1]], %eax +; CHECK: movq %r[[T1]], (%r[[A0]]) ; CHECK: ret } diff --git a/test/CodeGen/X86/segmented-stacks-dynamic.ll b/test/CodeGen/X86/segmented-stacks-dynamic.ll index c2aa617..e170762 100644 --- a/test/CodeGen/X86/segmented-stacks-dynamic.ll +++ b/test/CodeGen/X86/segmented-stacks-dynamic.ll @@ -31,7 +31,7 @@ false: ; X32-NEXT: ret ; X32: movl %esp, %eax -; X32-NEXT: subl %ecx, %eax +; X32: subl %ecx, %eax ; X32-NEXT: cmpl %eax, %gs:48 ; X32: movl %eax, %esp @@ -52,7 +52,7 @@ false: ; X64-NEXT: ret ; X64: movq %rsp, %[[RDI:rdi|rax]] -; X64-NEXT: subq %{{.*}}, %[[RDI]] +; X64: subq %{{.*}}, %[[RDI]] ; X64-NEXT: cmpq %[[RDI]], %fs:112 ; X64: movq %[[RDI]], %rsp diff --git a/test/CodeGen/X86/select.ll b/test/CodeGen/X86/select.ll index 5fe2b70..cdd258d 100644 --- a/test/CodeGen/X86/select.ll +++ b/test/CodeGen/X86/select.ll @@ -34,12 +34,12 @@ bb90: ; preds = %bb84, %bb72 bb91: ; preds = %bb84 ret i32 0 ; CHECK-LABEL: test2: -; CHECK: movnew -; CHECK: movswl +; CHECK: cmovnew +; CHECK: cwtl ; ATOM-LABEL: test2: -; ATOM: movnew -; ATOM: movswl +; ATOM: cmovnew +; ATOM: cwtl } declare i1 @return_false() @@ -256,8 +256,8 @@ entry: %call = tail call noalias i8* @_Znam(i64 %D) nounwind noredzone ret i8* %call ; CHECK-LABEL: test12: -; CHECK: movq $-1, %[[R:r..]] ; CHECK: mulq +; CHECK: movq $-1, %[[R:r..]] ; CHECK: cmovnoq %rax, %[[R]] ; CHECK: jmp __Znam diff --git a/test/CodeGen/X86/shift-bmi2.ll b/test/CodeGen/X86/shift-bmi2.ll index 0116789..7615754 100644 --- a/test/CodeGen/X86/shift-bmi2.ll +++ b/test/CodeGen/X86/shift-bmi2.ll @@ -30,10 +30,11 @@ entry: %x = load i32* %p %shl = shl i32 %x, %shamt ; BMI2: shl32p -; BMI2: shlxl %{{.+}}, ({{.+}}), %{{.+}} +; Source order scheduling prevents folding, rdar:14208996. +; BMI2: shlxl %{{.+}}, %{{.+}}, %{{.+}} ; BMI2: ret ; BMI264: shl32p -; BMI264: shlxl %{{.+}}, ({{.+}}), %{{.+}} +; BMI264: shlxl %{{.+}}, %{{.+}}, %{{.+}} ; BMI264: ret ret i32 %shl } @@ -74,7 +75,7 @@ entry: %x = load i64* %p %shl = shl i64 %x, %shamt ; BMI264: shl64p -; BMI264: shlxq %{{.+}}, ({{.+}}), %{{.+}} +; BMI264: shlxq %{{.+}}, %{{.+}}, %{{.+}} ; BMI264: ret ret i64 %shl } @@ -106,10 +107,11 @@ entry: %x = load i32* %p %shl = lshr i32 %x, %shamt ; BMI2: lshr32p -; BMI2: shrxl %{{.+}}, ({{.+}}), %{{.+}} +; Source order scheduling prevents folding, rdar:14208996. +; BMI2: shrxl %{{.+}}, %{{.+}}, %{{.+}} ; BMI2: ret ; BMI264: lshr32p -; BMI264: shrxl %{{.+}}, ({{.+}}), %{{.+}} +; BMI264: shrxl %{{.+}}, %{{.+}}, %{{.+}} ; BMI264: ret ret i32 %shl } @@ -128,7 +130,7 @@ entry: %x = load i64* %p %shl = lshr i64 %x, %shamt ; BMI264: lshr64p -; BMI264: shrxq %{{.+}}, ({{.+}}), %{{.+}} +; BMI264: shrxq %{{.+}}, %{{.+}}, %{{.+}} ; BMI264: ret ret i64 %shl } @@ -150,10 +152,11 @@ entry: %x = load i32* %p %shl = ashr i32 %x, %shamt ; BMI2: ashr32p -; BMI2: sarxl %{{.+}}, ({{.+}}), %{{.+}} +; Source order scheduling prevents folding, rdar:14208996. +; BMI2: sarxl %{{.+}}, %{{.+}}, %{{.+}} ; BMI2: ret ; BMI264: ashr32p -; BMI264: sarxl %{{.+}}, ({{.+}}), %{{.+}} +; BMI264: sarxl %{{.+}}, %{{.+}}, %{{.+}} ; BMI264: ret ret i32 %shl } @@ -172,7 +175,7 @@ entry: %x = load i64* %p %shl = ashr i64 %x, %shamt ; BMI264: ashr64p -; BMI264: sarxq %{{.+}}, ({{.+}}), %{{.+}} +; BMI264: sarxq %{{.+}}, %{{.+}}, %{{.+}} ; BMI264: ret ret i64 %shl } diff --git a/test/CodeGen/X86/sink-hoist.ll b/test/CodeGen/X86/sink-hoist.ll index 0741635..64f5311 100644 --- a/test/CodeGen/X86/sink-hoist.ll +++ b/test/CodeGen/X86/sink-hoist.ll @@ -26,11 +26,10 @@ define double @foo(double %x, double %y, i1 %c) nounwind { ; CHECK-LABEL: split: ; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: jne -; CHECK-NEXT: movaps -; CHECK-NEXT: ret +; CHECK-NEXT: je ; CHECK: divsd -; CHECK-NEXT: ret +; CHECK: movaps +; CHECK: ret define double @split(double %x, double %y, i1 %c) nounwind { %a = fdiv double %x, 3.2 %z = select i1 %c, double %a, double %y @@ -65,7 +64,7 @@ return: ; Sink instructions with dead EFLAGS defs. ; FIXME: Unfail the zzz test if we can correctly mark pregs with the kill flag. -; +; ; See <rdar://problem/8030636>. This test isn't valid after we made machine ; sinking more conservative about sinking instructions that define a preg into a ; block when we don't know if the preg is killed within the current block. diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll index 217139a..9147c22 100644 --- a/test/CodeGen/X86/sse2.ll +++ b/test/CodeGen/X86/sse2.ll @@ -7,7 +7,7 @@ define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { %tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 > store <2 x double> %tmp9, <2 x double>* %r, align 16 ret void - + ; CHECK-LABEL: test1: ; CHECK: movl 8(%esp), %eax ; CHECK-NEXT: movapd (%eax), %xmm0 @@ -23,12 +23,12 @@ define void @test2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { %tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 0, i32 2 > store <2 x double> %tmp9, <2 x double>* %r, align 16 ret void - + ; CHECK-LABEL: test2: -; CHECK: movl 8(%esp), %eax -; CHECK-NEXT: movapd (%eax), %xmm0 +; CHECK: movl 4(%esp), %eax +; CHECK: movl 8(%esp), %ecx +; CHECK-NEXT: movapd (%ecx), %xmm0 ; CHECK-NEXT: movhpd 12(%esp), %xmm0 -; CHECK-NEXT: movl 4(%esp), %eax ; CHECK-NEXT: movapd %xmm0, (%eax) ; CHECK-NEXT: ret } @@ -48,7 +48,7 @@ define void @test3(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) nounwind store <4 x float> %tmp13, <4 x float>* %res ret void ; CHECK: @test3 -; CHECK: unpcklps +; CHECK: unpcklps } define void @test4(<4 x float> %X, <4 x float>* %res) nounwind { @@ -85,9 +85,9 @@ define void @test6(<4 x float>* %res, <4 x float>* %A) nounwind { %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>> [#uses=1] store <4 x float> %tmp2, <4 x float>* %res ret void - + ; CHECK-LABEL: test6: -; CHECK: movaps (%eax), %xmm0 +; CHECK: movaps (%ecx), %xmm0 ; CHECK: movaps %xmm0, (%eax) } @@ -96,7 +96,7 @@ define void @test7() nounwind { shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> zeroinitializer ; <<4 x float>>:2 [#uses=1] store <4 x float> %2, <4 x float>* null ret void - + ; CHECK-LABEL: test7: ; CHECK: xorps %xmm0, %xmm0 ; CHECK: movaps %xmm0, 0 @@ -166,7 +166,7 @@ define void @test13(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B, <4 x fl store <4 x float> %tmp11, <4 x float>* %res ret void ; CHECK: test13 -; CHECK: shufps $69, (%eax), %xmm0 +; CHECK: shufps $69, (%ecx), %xmm0 ; CHECK: pshufd $-40, %xmm0, %xmm0 } @@ -178,8 +178,8 @@ define <4 x float> @test14(<4 x float>* %x, <4 x float>* %y) nounwind { %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 > ; <<4 x float>> [#uses=1] ret <4 x float> %tmp27 ; CHECK-LABEL: test14: -; CHECK: subps [[X1:%xmm[0-9]+]], [[X2:%xmm[0-9]+]] -; CHECK: addps [[X1]], [[X0:%xmm[0-9]+]] +; CHECK: addps [[X1:%xmm[0-9]+]], [[X0:%xmm[0-9]+]] +; CHECK: subps [[X1]], [[X2:%xmm[0-9]+]] ; CHECK: movlhps [[X2]], [[X0]] } @@ -221,4 +221,3 @@ entry: %double2float.i = fptrunc <4 x double> %0 to <4 x float> ret <4 x float> %double2float.i } - diff --git a/test/CodeGen/X86/store-narrow.ll b/test/CodeGen/X86/store-narrow.ll index fab266f..7557f25 100644 --- a/test/CodeGen/X86/store-narrow.ll +++ b/test/CodeGen/X86/store-narrow.ll @@ -12,7 +12,7 @@ entry: %D = or i32 %C, %B store i32 %D, i32* %a0, align 4 ret void - + ; X64-LABEL: test1: ; X64: movb %sil, (%rdi) @@ -34,8 +34,8 @@ entry: ; X64: movb %sil, 1(%rdi) ; X32-LABEL: test2: -; X32: movb 8(%esp), %al -; X32: movb %al, 1(%{{.*}}) +; X32: movb 8(%esp), %[[REG:[abcd]l]] +; X32: movb %[[REG]], 1(%{{.*}}) } define void @test3(i32* nocapture %a0, i16 zeroext %a1) nounwind ssp { @@ -67,8 +67,8 @@ entry: ; X64: movw %si, 2(%rdi) ; X32-LABEL: test4: -; X32: movl 8(%esp), %eax -; X32: movw %ax, 2(%{{.*}}) +; X32: movl 8(%esp), %e[[REG:[abcd]x]] +; X32: movw %[[REG]], 2(%{{.*}}) } define void @test5(i64* nocapture %a0, i16 zeroext %a1) nounwind ssp { @@ -84,8 +84,8 @@ entry: ; X64: movw %si, 2(%rdi) ; X32-LABEL: test5: -; X32: movzwl 8(%esp), %eax -; X32: movw %ax, 2(%{{.*}}) +; X32: movzwl 8(%esp), %e[[REG:[abcd]x]] +; X32: movw %[[REG]], 2(%{{.*}}) } define void @test6(i64* nocapture %a0, i8 zeroext %a1) nounwind ssp { @@ -102,8 +102,8 @@ entry: ; X32-LABEL: test6: -; X32: movb 8(%esp), %al -; X32: movb %al, 5(%{{.*}}) +; X32: movb 8(%esp), %[[REG:[abcd]l]] +; X32: movb %[[REG]], 5(%{{.*}}) } define i32 @test7(i64* nocapture %a0, i8 zeroext %a1, i32* %P2) nounwind { @@ -121,8 +121,8 @@ entry: ; X32-LABEL: test7: -; X32: movb 8(%esp), %cl -; X32: movb %cl, 5(%{{.*}}) +; X32: movb 8(%esp), %[[REG:[abcd]l]] +; X32: movb %[[REG]], 5(%{{.*}}) } ; PR7833 diff --git a/test/CodeGen/X86/tailcall-largecode.ll b/test/CodeGen/X86/tailcall-largecode.ll index e9b8721..f5662d9 100644 --- a/test/CodeGen/X86/tailcall-largecode.ll +++ b/test/CodeGen/X86/tailcall-largecode.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-linux-gnu -tailcallopt -code-model=large | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux-gnu -tailcallopt -code-model=large -enable-misched=false | FileCheck %s declare fastcc i32 @callee(i32 %arg) define fastcc i32 @directcall(i32 %arg) { diff --git a/test/CodeGen/X86/test-nofold.ll b/test/CodeGen/X86/test-nofold.ll index 97db1b3..19fbaaf 100644 --- a/test/CodeGen/X86/test-nofold.ll +++ b/test/CodeGen/X86/test-nofold.ll @@ -2,10 +2,10 @@ ; rdar://5752025 ; We want: -; CHECK: movl $42, %ecx -; CHECK-NEXT: movl 4(%esp), %eax -; CHECK-NEXT: andl $15, %eax -; CHECK-NEXT: cmovnel %ecx, %eax +; CHECK: movl 4(%esp), %ecx +; CHECK-NEXT: andl $15, %ecx +; CHECK-NEXT: movl $42, %eax +; CHECK-NEXT: cmovel %ecx, %eax ; CHECK-NEXT: ret ; ; We don't want: @@ -39,4 +39,3 @@ entry: %retval = select i1 %tmp4, i32 %tmp2, i32 42 ; <i32> [#uses=1] ret i32 %retval } - diff --git a/test/CodeGen/X86/trunc-to-bool.ll b/test/CodeGen/X86/trunc-to-bool.ll index 3711cf1..0ed6347 100644 --- a/test/CodeGen/X86/trunc-to-bool.ll +++ b/test/CodeGen/X86/trunc-to-bool.ll @@ -22,7 +22,7 @@ ret_false: ret i1 false } ; CHECK-LABEL: test2: -; CHECK: btl %eax +; CHECK: btl define i32 @test3(i8* %ptr) nounwind { %val = load i8* %ptr diff --git a/test/CodeGen/X86/v-binop-widen.ll b/test/CodeGen/X86/v-binop-widen.ll index 8655c6c..fca4da6 100644 --- a/test/CodeGen/X86/v-binop-widen.ll +++ b/test/CodeGen/X86/v-binop-widen.ll @@ -1,7 +1,7 @@ ; RUN: llc -mcpu=generic -march=x86 -mattr=+sse < %s | FileCheck %s -; CHECK: divss ; CHECK: divps ; CHECK: divps +; CHECK: divss %vec = type <9 x float> define %vec @vecdiv( %vec %p1, %vec %p2) @@ -9,4 +9,3 @@ define %vec @vecdiv( %vec %p1, %vec %p2) %result = fdiv %vec %p1, %p2 ret %vec %result } - diff --git a/test/CodeGen/X86/v-binop-widen2.ll b/test/CodeGen/X86/v-binop-widen2.ll index 569586a..3342111 100644 --- a/test/CodeGen/X86/v-binop-widen2.ll +++ b/test/CodeGen/X86/v-binop-widen2.ll @@ -2,9 +2,9 @@ ; RUN: llc -march=x86 -mcpu=atom -mattr=+sse < %s | FileCheck -check-prefix=ATOM %s %vec = type <6 x float> +; CHECK: divps ; CHECK: divss ; CHECK: divss -; CHECK: divps ; Scheduler causes a different instruction order to be produced on Intel Atom ; ATOM: divps diff --git a/test/CodeGen/X86/vec_shuffle-27.ll b/test/CodeGen/X86/vec_shuffle-27.ll index 8488c2d..c9b2fb5 100644 --- a/test/CodeGen/X86/vec_shuffle-27.ll +++ b/test/CodeGen/X86/vec_shuffle-27.ll @@ -7,10 +7,10 @@ target triple = "i686-apple-cl.1.0" define <8 x float> @my2filter4_1d(<4 x float> %a, <8 x float> %T0, <8 x float> %T1) nounwind readnone { entry: ; CHECK: subps -; CHECK: mulps -; CHECK: addps ; CHECK: subps ; CHECK: mulps +; CHECK: mulps +; CHECK: addps ; CHECK: addps %tmp7 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3 > ; <<8 x float>> [#uses=1] %sub = fsub <8 x float> %T1, %T0 ; <<8 x float>> [#uses=1] diff --git a/test/CodeGen/X86/vec_shuffle-39.ll b/test/CodeGen/X86/vec_shuffle-39.ll index 1560454..8fd9a5c 100644 --- a/test/CodeGen/X86/vec_shuffle-39.ll +++ b/test/CodeGen/X86/vec_shuffle-39.ll @@ -54,8 +54,8 @@ entry: define <2 x double> @t3() nounwind readonly { bb: ; CHECK-LABEL: t3: -; CHECK: punpcklqdq %xmm1, %xmm0 ; CHECK: movq (%rax), %xmm1 +; CHECK: punpcklqdq %xmm2, %xmm0 ; CHECK: movsd %xmm1, %xmm0 %tmp0 = load i128* null, align 1 %tmp1 = load <2 x i32>* undef, align 8 @@ -72,9 +72,9 @@ bb: define <2 x i64> @t4() nounwind readonly { bb: ; CHECK-LABEL: t4: -; CHECK: punpcklqdq %xmm0, %xmm1 ; CHECK: movq (%rax), %xmm0 -; CHECK: movsd %xmm1, %xmm0 +; CHECK: punpcklqdq %{{xmm.}}, %[[XMM:xmm[0-9]]] +; CHECK: movsd %[[XMM]], %xmm0 %tmp0 = load i128* null, align 1 %tmp1 = load <2 x i32>* undef, align 8 %tmp2 = bitcast i128 %tmp0 to <16 x i8> diff --git a/test/CodeGen/X86/widen_cast-1.ll b/test/CodeGen/X86/widen_cast-1.ll index ac4d036..d115929 100644 --- a/test/CodeGen/X86/widen_cast-1.ll +++ b/test/CodeGen/X86/widen_cast-1.ll @@ -1,8 +1,8 @@ ; RUN: llc -march=x86 -mcpu=generic -mattr=+sse4.2 < %s | FileCheck %s ; RUN: llc -march=x86 -mcpu=atom < %s | FileCheck -check-prefix=ATOM %s -; CHECK: paddd ; CHECK: movl +; CHECK: paddd ; CHECK: movlpd ; Scheduler causes produce a different instruction order diff --git a/test/CodeGen/X86/win64_alloca_dynalloca.ll b/test/CodeGen/X86/win64_alloca_dynalloca.ll index 9752edb..aff5305 100644 --- a/test/CodeGen/X86/win64_alloca_dynalloca.ll +++ b/test/CodeGen/X86/win64_alloca_dynalloca.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -mcpu=generic -mtriple=x86_64-mingw32 | FileCheck %s -check-prefix=M64 -; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32 | FileCheck %s -check-prefix=W64 -; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI +; RUN: llc < %s -mcpu=generic -enable-misched=false -mtriple=x86_64-mingw32 | FileCheck %s -check-prefix=M64 +; RUN: llc < %s -mcpu=generic -enable-misched=false -mtriple=x86_64-win32 | FileCheck %s -check-prefix=W64 +; RUN: llc < %s -mcpu=generic -enable-misched=false -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI ; PR8777 ; PR8778 @@ -52,18 +52,18 @@ entry: %r = call i64 @bar(i64 %n, i64 %x, i64 %n, i8* %buf0, i8* %buf1) nounwind ; M64: subq $48, %rsp -; M64: leaq -4096(%rbp), %r9 ; M64: movq %rax, 32(%rsp) +; M64: leaq -4096(%rbp), %r9 ; M64: callq bar ; W64: subq $48, %rsp -; W64: leaq -4096(%rbp), %r9 ; W64: movq %rax, 32(%rsp) +; W64: leaq -4096(%rbp), %r9 ; W64: callq bar ; EFI: subq $48, %rsp -; EFI: leaq -[[B0OFS]](%rbp), %r9 ; EFI: movq [[R64]], 32(%rsp) +; EFI: leaq -[[B0OFS]](%rbp), %r9 ; EFI: callq _bar ret i64 %r diff --git a/test/CodeGen/X86/x86-64-psub.ll b/test/CodeGen/X86/x86-64-psub.ll index be09a4f..183ddf4 100644 --- a/test/CodeGen/X86/x86-64-psub.ll +++ b/test/CodeGen/X86/x86-64-psub.ll @@ -4,8 +4,8 @@ ; This test checks that the operands of packed sub instructions are ; never interchanged by the "Two-Address instruction pass". -declare { i64, double } @getFirstParam() -declare { i64, double } @getSecondParam() +declare { i64, double } @getFirstParam() +declare { i64, double } @getSecondParam() define i64 @test_psubb() { entry: @@ -28,9 +28,10 @@ entry: ; CHECK-LABEL: test_psubb: ; CHECK: callq getFirstParam +; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]] ; CHECK: callq getSecondParam +; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]] ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]] -; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]] ; CHECK: psubb [[PARAM2]], [[PARAM1]] ; CHECK: ret @@ -55,9 +56,10 @@ entry: ; CHECK-LABEL: test_psubw: ; CHECK: callq getFirstParam +; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]] ; CHECK: callq getSecondParam +; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]] ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]] -; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]] ; CHECK: psubw [[PARAM2]], [[PARAM1]] ; CHECK: ret @@ -83,9 +85,10 @@ entry: ; CHECK-LABEL: test_psubd: ; CHECK: callq getFirstParam +; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]] ; CHECK: callq getSecondParam +; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]] ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]] -; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]] ; CHECK: psubd [[PARAM2]], [[PARAM1]] ; CHECK: ret @@ -110,9 +113,10 @@ entry: ; CHECK-LABEL: test_psubsb: ; CHECK: callq getFirstParam +; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]] ; CHECK: callq getSecondParam +; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]] ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]] -; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]] ; CHECK: psubsb [[PARAM2]], [[PARAM1]] ; CHECK: ret @@ -137,9 +141,10 @@ entry: ; CHECK-LABEL: test_psubswv: ; CHECK: callq getFirstParam +; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]] ; CHECK: callq getSecondParam +; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]] ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]] -; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]] ; CHECK: psubsw [[PARAM2]], [[PARAM1]] ; CHECK: ret @@ -164,9 +169,10 @@ entry: ; CHECK-LABEL: test_psubusbv: ; CHECK: callq getFirstParam +; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]] ; CHECK: callq getSecondParam +; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]] ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]] -; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]] ; CHECK: psubusb [[PARAM2]], [[PARAM1]] ; CHECK: ret @@ -191,9 +197,10 @@ entry: ; CHECK-LABEL: test_psubuswv: ; CHECK: callq getFirstParam +; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]] ; CHECK: callq getSecondParam +; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]] ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]] -; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]] ; CHECK: psubusw [[PARAM2]], [[PARAM1]] ; CHECK: ret diff --git a/test/CodeGen/X86/x86-shifts.ll b/test/CodeGen/X86/x86-shifts.ll index af57e5c..2f3adb8 100644 --- a/test/CodeGen/X86/x86-shifts.ll +++ b/test/CodeGen/X86/x86-shifts.ll @@ -6,8 +6,8 @@ define <4 x i32> @shl4(<4 x i32> %A) nounwind { entry: ; CHECK: shl4 -; CHECK: padd ; CHECK: pslld +; CHECK: padd ; CHECK: ret %B = shl <4 x i32> %A, < i32 2, i32 2, i32 2, i32 2> %C = shl <4 x i32> %A, < i32 1, i32 1, i32 1, i32 1> @@ -67,8 +67,8 @@ entry: define <8 x i16> @shl8(<8 x i16> %A) nounwind { entry: ; CHECK: shl8 -; CHECK: padd ; CHECK: psllw +; CHECK: padd ; CHECK: ret %B = shl <8 x i16> %A, < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> %C = shl <8 x i16> %A, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> diff --git a/test/CodeGen/X86/zext-fold.ll b/test/CodeGen/X86/zext-fold.ll index ff93c68..a10923f 100644 --- a/test/CodeGen/X86/zext-fold.ll +++ b/test/CodeGen/X86/zext-fold.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s +; RUN: llc < %s -mcpu=generic -march=x86 -enable-misched=false | FileCheck %s ;; Simple case define i32 @test1(i8 %x) nounwind readnone { @@ -10,7 +10,7 @@ define i32 @test1(i8 %x) nounwind readnone { ; CHECK: movzbl ; CHECK-NEXT: andl {{.*}}224 -;; Multiple uses of %x but easily extensible. +;; Multiple uses of %x but easily extensible. define i32 @test2(i8 %x) nounwind readnone { %A = and i8 %x, -32 %B = zext i8 %A to i32 @@ -21,8 +21,8 @@ define i32 @test2(i8 %x) nounwind readnone { } ; CHECK: test2 ; CHECK: movzbl -; CHECK: orl $63 ; CHECK: andl $224 +; CHECK: orl $63 declare void @use(i32, i8) diff --git a/test/CodeGen/X86/zext-sext.ll b/test/CodeGen/X86/zext-sext.ll index 25dabbe..5b2713d 100644 --- a/test/CodeGen/X86/zext-sext.ll +++ b/test/CodeGen/X86/zext-sext.ll @@ -34,10 +34,10 @@ entry: %tmp12 = add i64 %tmp11, 5089792279245435153 ; CHECK: addl $2138875574, %e[[REGISTER_zext:[a-z0-9]+]] -; CHECK-NEXT: cmpl $-8608074, %e[[REGISTER_zext]] -; CHECK: movslq %e[[REGISTER_zext]], [[REGISTER_tmp:%r[a-z0-9]+]] -; CHECK: movq [[REGISTER_tmp]], [[REGISTER_sext:%r[a-z0-9]+]] +; CHECK: movslq %e[[REGISTER_zext]], [[REGISTER_sext:%r[a-z0-9]+]] +; CHECK: cmpl $-8608074, %e[[REGISTER_zext]] ; CHECK-NOT: [[REGISTER_zext]] +; CHECK-DAG: testl %e[[REGISTER_zext]] ; CHECK: subq %r[[REGISTER_zext]], [[REGISTER_sext]] %tmp13 = sub i64 %tmp12, 2138875574 |