aboutsummaryrefslogtreecommitdiffstats
path: root/test/CodeGen/X86
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/X86')
-rw-r--r--test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll41
-rw-r--r--test/CodeGen/X86/2007-05-15-maskmovq.ll8
-rw-r--r--test/CodeGen/X86/2007-06-15-IntToMMX.ll13
-rw-r--r--test/CodeGen/X86/2007-07-03-GR64ToVR64.ll14
-rw-r--r--test/CodeGen/X86/2008-02-18-TailMergingBug.ll2
-rw-r--r--test/CodeGen/X86/2008-04-08-CoalescerCrash.ll8
-rw-r--r--test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll4
-rw-r--r--test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll6
-rw-r--r--test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll22
-rw-r--r--test/CodeGen/X86/2008-10-27-CoalescerBug.ll10
-rw-r--r--test/CodeGen/X86/2009-02-26-MachineLICMBug.ll2
-rw-r--r--test/CodeGen/X86/2009-03-13-PHIElimBug.ll2
-rw-r--r--test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll2
-rw-r--r--test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll2
-rw-r--r--test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll8
-rw-r--r--test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll64
-rw-r--r--test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll2
-rw-r--r--test/CodeGen/X86/2010-05-25-DotDebugLoc.ll7
-rw-r--r--test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll28
-rw-r--r--test/CodeGen/X86/2010-09-16-asmcrash.ll56
-rw-r--r--test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll26
-rw-r--r--test/CodeGen/X86/2010-09-30-CMOV-JumpTable-PHI.ll71
-rw-r--r--test/CodeGen/X86/2010-10-08-cmpxchg8b.ll28
-rw-r--r--test/CodeGen/X86/2010-11-02-DbgParameter.ll35
-rw-r--r--test/CodeGen/X86/2010-11-09-MOVLPS.ll66
-rw-r--r--test/CodeGen/X86/3addr-or.ll38
-rw-r--r--test/CodeGen/X86/abi-isel.ll610
-rw-r--r--test/CodeGen/X86/add-of-carry.ll14
-rw-r--r--test/CodeGen/X86/alldiv-divdi3.ll17
-rw-r--r--test/CodeGen/X86/andimm8.ll19
-rw-r--r--test/CodeGen/X86/atomic_op.ll71
-rw-r--r--test/CodeGen/X86/avx-intrinsics-x86.ll122
-rw-r--r--test/CodeGen/X86/avx-intrinsics-x86_64.ll2
-rw-r--r--test/CodeGen/X86/bit-test-shift.ll13
-rw-r--r--test/CodeGen/X86/bswap-inline-asm.ll7
-rw-r--r--test/CodeGen/X86/byval.ll11
-rw-r--r--test/CodeGen/X86/cmp-test.ll27
-rw-r--r--test/CodeGen/X86/cmp.ll92
-rw-r--r--test/CodeGen/X86/cmp0.ll24
-rw-r--r--test/CodeGen/X86/cmp2.ll18
-rw-r--r--test/CodeGen/X86/compare-inf.ll16
-rw-r--r--test/CodeGen/X86/complex-asm.ll17
-rw-r--r--test/CodeGen/X86/dll-linkage.ll2
-rw-r--r--test/CodeGen/X86/dllexport.ll2
-rw-r--r--test/CodeGen/X86/dollar-name.ll2
-rw-r--r--test/CodeGen/X86/dyn-stackalloc.ll6
-rw-r--r--test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll23
-rw-r--r--test/CodeGen/X86/fast-isel-bc.ll16
-rw-r--r--test/CodeGen/X86/fast-isel-mem.ll18
-rw-r--r--test/CodeGen/X86/fltused.ll19
-rw-r--r--test/CodeGen/X86/fp-stack-compare.ll3
-rw-r--r--test/CodeGen/X86/ghc-cc.ll4
-rw-r--r--test/CodeGen/X86/global-sections.ll4
-rw-r--r--test/CodeGen/X86/legalizedag_vec.ll8
-rw-r--r--test/CodeGen/X86/loop-strength-reduce4.ll4
-rw-r--r--test/CodeGen/X86/lsr-reuse.ll15
-rw-r--r--test/CodeGen/X86/lsr-wrap.ll2
-rw-r--r--test/CodeGen/X86/memcmp.ll12
-rw-r--r--test/CodeGen/X86/memmove-0.ll9
-rw-r--r--test/CodeGen/X86/memmove-1.ll9
-rw-r--r--test/CodeGen/X86/memmove-2.ll9
-rw-r--r--test/CodeGen/X86/memmove-3.ll9
-rw-r--r--test/CodeGen/X86/memset-2.ll4
-rw-r--r--test/CodeGen/X86/mingw-alloca.ll4
-rw-r--r--test/CodeGen/X86/mmx-arg-passing.ll19
-rw-r--r--test/CodeGen/X86/mmx-arg-passing2.ll14
-rw-r--r--test/CodeGen/X86/mmx-arith.ll380
-rw-r--r--test/CodeGen/X86/mmx-bitcast-to-i64.ll37
-rw-r--r--test/CodeGen/X86/mmx-builtins.ll1324
-rw-r--r--test/CodeGen/X86/mmx-insert-element.ll10
-rw-r--r--test/CodeGen/X86/mmx-pinsrw.ll2
-rw-r--r--test/CodeGen/X86/mmx-punpckhdq.ll19
-rw-r--r--test/CodeGen/X86/mmx-shift.ll24
-rw-r--r--test/CodeGen/X86/mmx-shuffle.ll6
-rw-r--r--test/CodeGen/X86/mmx-vzmovl-2.ll24
-rw-r--r--test/CodeGen/X86/mmx-vzmovl.ll4
-rw-r--r--test/CodeGen/X86/movgs.ll53
-rw-r--r--test/CodeGen/X86/mult-alt-generic-i686.ll321
-rw-r--r--test/CodeGen/X86/mult-alt-generic-x86_64.ll321
-rw-r--r--test/CodeGen/X86/mult-alt-x86.ll358
-rw-r--r--test/CodeGen/X86/narrow-shl-load.ll65
-rw-r--r--test/CodeGen/X86/narrow_op-2.ll25
-rw-r--r--test/CodeGen/X86/phi-immediate-factoring.ll2
-rw-r--r--test/CodeGen/X86/phys_subreg_coalesce-2.ll2
-rw-r--r--test/CodeGen/X86/pic.ll24
-rw-r--r--test/CodeGen/X86/postra-licm.ll2
-rw-r--r--test/CodeGen/X86/pr2659.ll7
-rw-r--r--test/CodeGen/X86/pr3522.ll2
-rw-r--r--test/CodeGen/X86/shift-folding.ll6
-rw-r--r--test/CodeGen/X86/sibcall-3.ll2
-rw-r--r--test/CodeGen/X86/sibcall.ll30
-rw-r--r--test/CodeGen/X86/sink-hoist.ll29
-rw-r--r--test/CodeGen/X86/sse1.ll25
-rw-r--r--test/CodeGen/X86/sse2.ll168
-rw-r--r--test/CodeGen/X86/sse3.ll17
-rw-r--r--test/CodeGen/X86/sse41.ll25
-rw-r--r--test/CodeGen/X86/stdcall-notailcall.ll13
-rw-r--r--test/CodeGen/X86/stdcall.ll2
-rw-r--r--test/CodeGen/X86/store-narrow.ll45
-rw-r--r--test/CodeGen/X86/store_op_load_fold2.ll2
-rw-r--r--test/CodeGen/X86/tail-opts.ll21
-rw-r--r--test/CodeGen/X86/tailcall-stackalign.ll2
-rw-r--r--test/CodeGen/X86/tailcallfp2.ll4
-rw-r--r--test/CodeGen/X86/tls9.ll2
-rw-r--r--test/CodeGen/X86/twoaddr-coalesce.ll2
-rw-r--r--test/CodeGen/X86/uint64-to-float.ll21
-rw-r--r--test/CodeGen/X86/unaligned-load.ll2
-rw-r--r--test/CodeGen/X86/vec_insert-5.ll9
-rw-r--r--test/CodeGen/X86/vec_insert-7.ll15
-rw-r--r--test/CodeGen/X86/vec_insert-9.ll2
-rw-r--r--test/CodeGen/X86/vec_set-F.ll6
-rw-r--r--test/CodeGen/X86/vec_shuffle-10.ll25
-rw-r--r--test/CodeGen/X86/vec_shuffle-3.ll20
-rw-r--r--test/CodeGen/X86/vec_shuffle-37.ll24
-rw-r--r--test/CodeGen/X86/vec_shuffle-4.ll12
-rw-r--r--test/CodeGen/X86/vec_shuffle-5.ll13
-rw-r--r--test/CodeGen/X86/vec_shuffle-6.ll42
-rw-r--r--test/CodeGen/X86/vec_shuffle-7.ll11
-rw-r--r--test/CodeGen/X86/vec_shuffle-8.ll10
-rw-r--r--test/CodeGen/X86/vec_shuffle-9.ll21
-rw-r--r--test/CodeGen/X86/vec_zero_cse.ll5
-rw-r--r--test/CodeGen/X86/widen_select-1.ll2
-rw-r--r--test/CodeGen/X86/win64_params.ll11
-rw-r--r--test/CodeGen/X86/win64_vararg.ll20
-rw-r--r--test/CodeGen/X86/win_chkstk.ll46
125 files changed, 4504 insertions, 1082 deletions
diff --git a/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll b/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll
index c39b82a..a662dd5 100644
--- a/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll
+++ b/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -o - -march=x86 -mattr=+mmx | FileCheck %s
+; There are no MMX instructions here. We use add+adcl for the adds.
define <1 x i64> @unsigned_add3(<1 x i64>* %a, <1 x i64>* %b, i32 %count) nounwind {
entry:
@@ -7,9 +8,8 @@ entry:
bb26: ; preds = %bb26, %entry
-; CHECK: movq ({{.*}},8), %mm
-; CHECK: paddq ({{.*}},8), %mm
-; CHECK: paddq %mm{{[0-7]}}, %mm
+; CHECK: addl %e
+; CHECK: adcl %e
%i.037.0 = phi i32 [ 0, %entry ], [ %tmp25, %bb26 ] ; <i32> [#uses=3]
%sum.035.0 = phi <1 x i64> [ zeroinitializer, %entry ], [ %tmp22, %bb26 ] ; <<1 x i64>> [#uses=1]
@@ -27,3 +27,38 @@ bb31: ; preds = %bb26, %entry
%sum.035.1 = phi <1 x i64> [ zeroinitializer, %entry ], [ %tmp22, %bb26 ] ; <<1 x i64>> [#uses=1]
ret <1 x i64> %sum.035.1
}
+
+
+; This is the original test converted to use MMX intrinsics.
+
+define <1 x i64> @unsigned_add3a(x86_mmx* %a, x86_mmx* %b, i32 %count) nounwind {
+entry:
+ %tmp2943 = bitcast <1 x i64><i64 0> to x86_mmx
+ %tmp2942 = icmp eq i32 %count, 0 ; <i1> [#uses=1]
+ br i1 %tmp2942, label %bb31, label %bb26
+
+bb26: ; preds = %bb26, %entry
+
+; CHECK: movq ({{.*}},8), %mm
+; CHECK: paddq ({{.*}},8), %mm
+; CHECK: paddq %mm{{[0-7]}}, %mm
+
+ %i.037.0 = phi i32 [ 0, %entry ], [ %tmp25, %bb26 ] ; <i32> [#uses=3]
+ %sum.035.0 = phi x86_mmx [ %tmp2943, %entry ], [ %tmp22, %bb26 ] ; <x86_mmx> [#uses=1]
+ %tmp13 = getelementptr x86_mmx* %b, i32 %i.037.0 ; <x86_mmx*> [#uses=1]
+ %tmp14 = load x86_mmx* %tmp13 ; <x86_mmx> [#uses=1]
+ %tmp18 = getelementptr x86_mmx* %a, i32 %i.037.0 ; <x86_mmx*> [#uses=1]
+ %tmp19 = load x86_mmx* %tmp18 ; <x86_mmx> [#uses=1]
+ %tmp21 = call x86_mmx @llvm.x86.mmx.padd.q (x86_mmx %tmp19, x86_mmx %tmp14) ; <x86_mmx> [#uses=1]
+ %tmp22 = call x86_mmx @llvm.x86.mmx.padd.q (x86_mmx %tmp21, x86_mmx %sum.035.0) ; <x86_mmx> [#uses=2]
+ %tmp25 = add i32 %i.037.0, 1 ; <i32> [#uses=2]
+ %tmp29 = icmp ult i32 %tmp25, %count ; <i1> [#uses=1]
+ br i1 %tmp29, label %bb26, label %bb31
+
+bb31: ; preds = %bb26, %entry
+ %sum.035.1 = phi x86_mmx [ %tmp2943, %entry ], [ %tmp22, %bb26 ] ; <x86_mmx> [#uses=1]
+ %t = bitcast x86_mmx %sum.035.1 to <1 x i64>
+ ret <1 x i64> %t
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx)
diff --git a/test/CodeGen/X86/2007-05-15-maskmovq.ll b/test/CodeGen/X86/2007-05-15-maskmovq.ll
index 2093b8f..006cf2e 100644
--- a/test/CodeGen/X86/2007-05-15-maskmovq.ll
+++ b/test/CodeGen/X86/2007-05-15-maskmovq.ll
@@ -5,10 +5,10 @@ target triple = "i686-apple-darwin8"
define void @test(<1 x i64> %c64, <1 x i64> %mask1, i8* %P) {
entry:
- %tmp4 = bitcast <1 x i64> %mask1 to <8 x i8> ; <<8 x i8>> [#uses=1]
- %tmp6 = bitcast <1 x i64> %c64 to <8 x i8> ; <<8 x i8>> [#uses=1]
- tail call void @llvm.x86.mmx.maskmovq( <8 x i8> %tmp6, <8 x i8> %tmp4, i8* %P )
+ %tmp4 = bitcast <1 x i64> %mask1 to x86_mmx ; <x86_mmx> [#uses=1]
+ %tmp6 = bitcast <1 x i64> %c64 to x86_mmx ; <x86_mmx> [#uses=1]
+ tail call void @llvm.x86.mmx.maskmovq( x86_mmx %tmp4, x86_mmx %tmp6, i8* %P )
ret void
}
-declare void @llvm.x86.mmx.maskmovq(<8 x i8>, <8 x i8>, i8*)
+declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*)
diff --git a/test/CodeGen/X86/2007-06-15-IntToMMX.ll b/test/CodeGen/X86/2007-06-15-IntToMMX.ll
index 6128d8b..660d4fe 100644
--- a/test/CodeGen/X86/2007-06-15-IntToMMX.ll
+++ b/test/CodeGen/X86/2007-06-15-IntToMMX.ll
@@ -1,17 +1,16 @@
; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep paddusw
-@R = external global <1 x i64> ; <<1 x i64>*> [#uses=1]
+@R = external global x86_mmx ; <x86_mmx*> [#uses=1]
define void @foo(<1 x i64> %A, <1 x i64> %B) {
entry:
- %tmp4 = bitcast <1 x i64> %B to <4 x i16> ; <<4 x i16>> [#uses=1]
- %tmp6 = bitcast <1 x i64> %A to <4 x i16> ; <<4 x i16>> [#uses=1]
- %tmp7 = tail call <4 x i16> @llvm.x86.mmx.paddus.w( <4 x i16> %tmp6, <4 x i16> %tmp4 ) ; <<4 x i16>> [#uses=1]
- %tmp8 = bitcast <4 x i16> %tmp7 to <1 x i64> ; <<1 x i64>> [#uses=1]
- store <1 x i64> %tmp8, <1 x i64>* @R
+ %tmp2 = bitcast <1 x i64> %A to x86_mmx
+ %tmp3 = bitcast <1 x i64> %B to x86_mmx
+ %tmp7 = tail call x86_mmx @llvm.x86.mmx.paddus.w( x86_mmx %tmp2, x86_mmx %tmp3 ) ; <x86_mmx> [#uses=1]
+ store x86_mmx %tmp7, x86_mmx* @R
tail call void @llvm.x86.mmx.emms( )
ret void
}
-declare <4 x i16> @llvm.x86.mmx.paddus.w(<4 x i16>, <4 x i16>)
+declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx)
declare void @llvm.x86.mmx.emms()
diff --git a/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll b/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll
index 2c513f1..1c5e676 100644
--- a/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll
+++ b/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll
@@ -2,19 +2,17 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {movd %rdi, %mm1}
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {paddusw %mm0, %mm1}
-@R = external global <1 x i64> ; <<1 x i64>*> [#uses=1]
+@R = external global x86_mmx ; <x86_mmx*> [#uses=1]
define void @foo(<1 x i64> %A, <1 x i64> %B) nounwind {
entry:
- %tmp4 = bitcast <1 x i64> %B to <4 x i16> ; <<4 x i16>> [#uses=1]
- %tmp6 = bitcast <1 x i64> %A to <4 x i16> ; <<4 x i16>> [#uses=1]
- %tmp7 = tail call <4 x i16> @llvm.x86.mmx.paddus.w( <4 x i16> %tmp6, <4 x i16> %tmp4 ) ; <<4 x i16>> [#uses=1]
- %tmp8 = bitcast <4 x i16> %tmp7 to <1 x i64> ; <<1 x i64>> [#uses=1]
- store <1 x i64> %tmp8, <1 x i64>* @R
+ %tmp4 = bitcast <1 x i64> %B to x86_mmx ; <<4 x i16>> [#uses=1]
+ %tmp6 = bitcast <1 x i64> %A to x86_mmx ; <<4 x i16>> [#uses=1]
+ %tmp7 = tail call x86_mmx @llvm.x86.mmx.paddus.w( x86_mmx %tmp6, x86_mmx %tmp4 ) ; <x86_mmx> [#uses=1]
+ store x86_mmx %tmp7, x86_mmx* @R
tail call void @llvm.x86.mmx.emms( )
ret void
}
-declare <4 x i16> @llvm.x86.mmx.paddus.w(<4 x i16>, <4 x i16>)
-
+declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx)
declare void @llvm.x86.mmx.emms()
diff --git a/test/CodeGen/X86/2008-02-18-TailMergingBug.ll b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
index 7463a0e..bdacf50 100644
--- a/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
+++ b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah -stats |& grep {Number of block tails merged} | grep 9
+; RUN: llc < %s -march=x86 -mcpu=yonah -stats |& grep {Number of block tails merged} | grep 16
; PR1909
@.str = internal constant [48 x i8] c"transformed bounds: (%.2f, %.2f), (%.2f, %.2f)\0A\00" ; <[48 x i8]*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll b/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll
index dc8c097..5089e8c 100644
--- a/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll
+++ b/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll
@@ -5,15 +5,15 @@ entry:
tail call void asm sideeffect "# top of block", "~{dirflag},~{fpsr},~{flags},~{di},~{si},~{dx},~{cx},~{ax}"( ) nounwind
tail call void asm sideeffect ".file \224443946.c\22", "~{dirflag},~{fpsr},~{flags}"( ) nounwind
tail call void asm sideeffect ".line 8", "~{dirflag},~{fpsr},~{flags}"( ) nounwind
- %tmp1 = tail call <2 x i32> asm sideeffect "movd $1, $0", "=={mm4},{bp},~{dirflag},~{fpsr},~{flags},~{memory}"( i32 undef ) nounwind ; <<2 x i32>> [#uses=1]
+ %tmp1 = tail call x86_mmx asm sideeffect "movd $1, $0", "=={mm4},{bp},~{dirflag},~{fpsr},~{flags},~{memory}"( i32 undef ) nounwind ; <x86_mmx> [#uses=1]
tail call void asm sideeffect ".file \224443946.c\22", "~{dirflag},~{fpsr},~{flags}"( ) nounwind
tail call void asm sideeffect ".line 9", "~{dirflag},~{fpsr},~{flags}"( ) nounwind
- %tmp3 = tail call i32 asm sideeffect "movd $1, $0", "=={bp},{mm3},~{dirflag},~{fpsr},~{flags},~{memory}"( <2 x i32> undef ) nounwind ; <i32> [#uses=1]
+ %tmp3 = tail call i32 asm sideeffect "movd $1, $0", "=={bp},{mm3},~{dirflag},~{fpsr},~{flags},~{memory}"( x86_mmx undef ) nounwind ; <i32> [#uses=1]
tail call void asm sideeffect ".file \224443946.c\22", "~{dirflag},~{fpsr},~{flags}"( ) nounwind
tail call void asm sideeffect ".line 10", "~{dirflag},~{fpsr},~{flags}"( ) nounwind
- tail call void asm sideeffect "movntq $0, 0($1,$2)", "{mm0},{di},{bp},~{dirflag},~{fpsr},~{flags},~{memory}"( <2 x i32> undef, i32 undef, i32 %tmp3 ) nounwind
+ tail call void asm sideeffect "movntq $0, 0($1,$2)", "{mm0},{di},{bp},~{dirflag},~{fpsr},~{flags},~{memory}"( x86_mmx undef, i32 undef, i32 %tmp3 ) nounwind
tail call void asm sideeffect ".file \224443946.c\22", "~{dirflag},~{fpsr},~{flags}"( ) nounwind
tail call void asm sideeffect ".line 11", "~{dirflag},~{fpsr},~{flags}"( ) nounwind
- %tmp8 = tail call i32 asm sideeffect "movd $1, $0", "=={bp},{mm4},~{dirflag},~{fpsr},~{flags},~{memory}"( <2 x i32> %tmp1 ) nounwind ; <i32> [#uses=0]
+ %tmp8 = tail call i32 asm sideeffect "movd $1, $0", "=={bp},{mm4},~{dirflag},~{fpsr},~{flags},~{memory}"( x86_mmx %tmp1 ) nounwind ; <i32> [#uses=0]
ret i32 undef
}
diff --git a/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll b/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
index 500cd1f..8665282 100644
--- a/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
+++ b/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd | count 5
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movl | count 2
+; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | grep movsd | count 5
+; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | grep movl | count 2
@atomic = global double 0.000000e+00 ; <double*> [#uses=1]
@atomic2 = global double 0.000000e+00 ; <double*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll b/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll
index c76dd7d..53402c0 100644
--- a/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll
+++ b/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll
@@ -17,11 +17,13 @@ entry:
br i1 false, label %bb.nph144.split, label %bb133
bb.nph144.split: ; preds = %entry
- tail call void @llvm.x86.mmx.maskmovq( <8 x i8> zeroinitializer, <8 x i8> zeroinitializer, i8* null ) nounwind
+ %tmp = bitcast <8 x i8> zeroinitializer to x86_mmx
+ %tmp2 = bitcast <8 x i8> zeroinitializer to x86_mmx
+ tail call void @llvm.x86.mmx.maskmovq( x86_mmx %tmp, x86_mmx %tmp2, i8* null ) nounwind
unreachable
bb133: ; preds = %entry
ret void
}
-declare void @llvm.x86.mmx.maskmovq(<8 x i8>, <8 x i8>, i8*) nounwind
+declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind
diff --git a/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll b/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
index 60be0d5..2dc1dea 100644
--- a/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
+++ b/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
@@ -1,6 +1,9 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | grep unpcklpd
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | grep unpckhpd
; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvttpd2pi | count 1
; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvtpi2pd | count 1
-; PR2687
+; originally from PR2687, but things don't work that way any more.
+; there are no MMX instructions here; we use XMM.
define <2 x double> @a(<2 x i32> %x) nounwind {
entry:
@@ -13,3 +16,20 @@ entry:
%y = fptosi <2 x double> %x to <2 x i32>
ret <2 x i32> %y
}
+
+; This is how to get MMX instructions.
+
+define <2 x double> @a2(x86_mmx %x) nounwind {
+entry:
+ %y = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %x)
+ ret <2 x double> %y
+}
+
+define x86_mmx @b2(<2 x double> %x) nounwind {
+entry:
+ %y = tail call x86_mmx @llvm.x86.sse.cvttpd2pi (<2 x double> %x)
+ ret x86_mmx %y
+}
+
+declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx)
+declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>)
diff --git a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
index afeb358..9d144a4 100644
--- a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
@@ -1,6 +1,9 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& not grep {Number of register spills}
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -stats |& FileCheck %s
+; Now this test spills one register. But a reload in the loop is cheaper than
+; the divsd so it's a win.
define fastcc void @fourn(double* %data, i32 %isign) nounwind {
+; CHECK: fourn
entry:
br label %bb
@@ -11,6 +14,11 @@ bb: ; preds = %bb, %entry
%1 = icmp sgt i32 %0, 2 ; <i1> [#uses=1]
br i1 %1, label %bb30.loopexit, label %bb
+; CHECK: %bb30.loopexit
+; CHECK: divsd %xmm0
+; CHECK: movsd %xmm0, 16(%esp)
+; CHECK: .align
+; CHECK-NEXT: %bb3
bb3: ; preds = %bb30.loopexit, %bb25, %bb3
%2 = load i32* null, align 4 ; <i32> [#uses=1]
%3 = mul i32 %2, 0 ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
index 4a97ac3..0b5b7bd 100644
--- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
+++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {7 machine-licm}
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {8 machine-licm}
; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 | FileCheck %s
; rdar://6627786
; rdar://7792037
diff --git a/test/CodeGen/X86/2009-03-13-PHIElimBug.ll b/test/CodeGen/X86/2009-03-13-PHIElimBug.ll
index 8d42627..2853930 100644
--- a/test/CodeGen/X86/2009-03-13-PHIElimBug.ll
+++ b/test/CodeGen/X86/2009-03-13-PHIElimBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -mtriple=i686-linux | FileCheck %s
; Check the register copy comes after the call to f and before the call to g
; PR3784
diff --git a/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll b/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll
index da493d4..b13d33e 100644
--- a/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll
+++ b/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -asm-verbose | FileCheck %s
+; RUN: llc < %s -mtriple=i686-linux -asm-verbose | FileCheck %s
; Check that register copies in the landing pad come after the EH_LABEL
declare i32 @f()
diff --git a/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll b/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
index 336f17e..01852a6 100644
--- a/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
+++ b/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx | not grep movl
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | not grep movl
define <8 x i8> @a(i8 zeroext %x) nounwind {
%r = insertelement <8 x i8> undef, i8 %x, i32 0
diff --git a/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll b/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll
index b9b09a3..288eef4 100644
--- a/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll
+++ b/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll
@@ -1,10 +1,12 @@
; RUN: llc < %s -march=x86-64
; PR4669
-declare <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64>, i32)
+declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32)
define <1 x i64> @test(i64 %t) {
entry:
%t1 = insertelement <1 x i64> undef, i64 %t, i32 0
- %t2 = tail call <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64> %t1, i32 48)
- ret <1 x i64> %t2
+ %t0 = bitcast <1 x i64> %t1 to x86_mmx
+ %t2 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %t0, i32 48)
+ %t3 = bitcast x86_mmx %t2 to <1 x i64>
+ ret <1 x i64> %t3
}
diff --git a/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll b/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
index 4cd3be3..fa3d5fb 100644
--- a/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
+++ b/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
@@ -1,12 +1,12 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | FileCheck %s
+; There are no MMX operations here, so we use XMM or i64.
define void @ti8(double %a, double %b) nounwind {
entry:
%tmp1 = bitcast double %a to <8 x i8>
-; CHECK: movdq2q
%tmp2 = bitcast double %b to <8 x i8>
-; CHECK: movdq2q
%tmp3 = add <8 x i8> %tmp1, %tmp2
+; CHECK: paddb %xmm1, %xmm0
store <8 x i8> %tmp3, <8 x i8>* null
ret void
}
@@ -14,10 +14,9 @@ entry:
define void @ti16(double %a, double %b) nounwind {
entry:
%tmp1 = bitcast double %a to <4 x i16>
-; CHECK: movdq2q
%tmp2 = bitcast double %b to <4 x i16>
-; CHECK: movdq2q
%tmp3 = add <4 x i16> %tmp1, %tmp2
+; CHECK: paddw %xmm1, %xmm0
store <4 x i16> %tmp3, <4 x i16>* null
ret void
}
@@ -25,10 +24,9 @@ entry:
define void @ti32(double %a, double %b) nounwind {
entry:
%tmp1 = bitcast double %a to <2 x i32>
-; CHECK: movdq2q
%tmp2 = bitcast double %b to <2 x i32>
-; CHECK: movdq2q
%tmp3 = add <2 x i32> %tmp1, %tmp2
+; CHECK: paddd %xmm1, %xmm0
store <2 x i32> %tmp3, <2 x i32>* null
ret void
}
@@ -36,10 +34,60 @@ entry:
define void @ti64(double %a, double %b) nounwind {
entry:
%tmp1 = bitcast double %a to <1 x i64>
-; CHECK: movdq2q
%tmp2 = bitcast double %b to <1 x i64>
-; CHECK: movdq2q
%tmp3 = add <1 x i64> %tmp1, %tmp2
+; CHECK: addq %rax, %rcx
store <1 x i64> %tmp3, <1 x i64>* null
ret void
}
+
+; MMX intrinsics calls get us MMX instructions.
+
+define void @ti8a(double %a, double %b) nounwind {
+entry:
+ %tmp1 = bitcast double %a to x86_mmx
+; CHECK: movdq2q
+ %tmp2 = bitcast double %b to x86_mmx
+; CHECK: movdq2q
+ %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %tmp1, x86_mmx %tmp2)
+ store x86_mmx %tmp3, x86_mmx* null
+ ret void
+}
+
+define void @ti16a(double %a, double %b) nounwind {
+entry:
+ %tmp1 = bitcast double %a to x86_mmx
+; CHECK: movdq2q
+ %tmp2 = bitcast double %b to x86_mmx
+; CHECK: movdq2q
+ %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %tmp1, x86_mmx %tmp2)
+ store x86_mmx %tmp3, x86_mmx* null
+ ret void
+}
+
+define void @ti32a(double %a, double %b) nounwind {
+entry:
+ %tmp1 = bitcast double %a to x86_mmx
+; CHECK: movdq2q
+ %tmp2 = bitcast double %b to x86_mmx
+; CHECK: movdq2q
+ %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %tmp1, x86_mmx %tmp2)
+ store x86_mmx %tmp3, x86_mmx* null
+ ret void
+}
+
+define void @ti64a(double %a, double %b) nounwind {
+entry:
+ %tmp1 = bitcast double %a to x86_mmx
+; CHECK: movdq2q
+ %tmp2 = bitcast double %b to x86_mmx
+; CHECK: movdq2q
+ %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %tmp1, x86_mmx %tmp2)
+ store x86_mmx %tmp3, x86_mmx* null
+ ret void
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx)
diff --git a/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll b/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
index e20f1d8..3738f80 100644
--- a/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
+++ b/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
@@ -11,7 +11,7 @@ target triple = "i386-apple-darwin10.0.0"
; Verify that %esi gets spilled before the call.
; CHECK: Z4test1SiS
; CHECK: movl %esi,{{.*}}(%ebp)
-; CHECK: call __Z6throwsv
+; CHECK: calll __Z6throwsv
define i8* @_Z4test1SiS_(%struct.S* byval %s1, i32 %n, %struct.S* byval %s2) ssp {
entry:
diff --git a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
index d211549..f9bda7f 100644
--- a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
@@ -1,5 +1,8 @@
-; RUN: llc -march=x86-64 -O2 < %s | grep debug_loc12
-; Test to check .debug_loc support. This test case emits 13 debug_loc entries.
+; RUN: llc -march=x86-64 -O2 < %s | FileCheck %s
+; Test to check .debug_loc support. This test case emits many debug_loc entries.
+
+; CHECK: Loc expr size
+; CHECK-NEXT: DW_OP_reg
%0 = type { double }
diff --git a/test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll b/test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll
new file mode 100644
index 0000000..e5542ba
--- /dev/null
+++ b/test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -verify-machineinstrs | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; This test exercises the alias checking in SimpleRegisterCoalescing::RemoveCopyByCommutingDef.
+
+define void @f(i32* %w, i32* %h, i8* %_this, i8* %image) nounwind ssp {
+ %x1 = tail call i64 @g(i8* %_this, i8* %image) nounwind ; <i64> [#uses=3]
+ %tmp1 = trunc i64 %x1 to i32 ; <i32> [#uses=1]
+; CHECK: movl (%r{{.*}}), %
+ %x4 = load i32* %h, align 4 ; <i32> [#uses=1]
+
+; The imull clobbers a 32-bit register.
+; CHECK: imull %{{...}}, %e[[CLOBBER:..]]
+ %x5 = mul nsw i32 %x4, %tmp1 ; <i32> [#uses=1]
+
+; So we cannot use the corresponding 64-bit register anymore.
+; CHECK-NOT: shrq $32, %r[[CLOBBER]]
+ %btmp3 = lshr i64 %x1, 32 ; <i64> [#uses=1]
+ %btmp4 = trunc i64 %btmp3 to i32 ; <i32> [#uses=1]
+
+; CHECK: idiv
+ %x6 = sdiv i32 %x5, %btmp4 ; <i32> [#uses=1]
+ store i32 %x6, i32* %w, align 4
+ ret void
+}
+
+declare i64 @g(i8*, i8*)
diff --git a/test/CodeGen/X86/2010-09-16-asmcrash.ll b/test/CodeGen/X86/2010-09-16-asmcrash.ll
new file mode 100644
index 0000000..9bbd691
--- /dev/null
+++ b/test/CodeGen/X86/2010-09-16-asmcrash.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-freebsd8.1 -o /dev/null
+; This formerly crashed, PR 8154.
+
+module asm ".weak sem_close"
+module asm ".equ sem_close, _sem_close"
+module asm ".weak sem_destroy"
+module asm ".equ sem_destroy, _sem_destroy"
+module asm ".weak sem_getvalue"
+module asm ".equ sem_getvalue, _sem_getvalue"
+module asm ".weak sem_init"
+module asm ".equ sem_init, _sem_init"
+module asm ".weak sem_open"
+module asm ".equ sem_open, _sem_open"
+module asm ".weak sem_post"
+module asm ".equ sem_post, _sem_post"
+module asm ".weak sem_timedwait"
+module asm ".equ sem_timedwait, _sem_timedwait"
+module asm ".weak sem_trywait"
+module asm ".equ sem_trywait, _sem_trywait"
+module asm ".weak sem_unlink"
+module asm ".equ sem_unlink, _sem_unlink"
+module asm ".weak sem_wait"
+module asm ".equ sem_wait, _sem_wait"
+
+%struct._sem = type { i32, %struct._usem }
+%struct._usem = type { i32, i32, i32 }
+
+define void @_sem_timedwait(%struct._sem* noalias %sem) nounwind ssp {
+entry:
+ br i1 undef, label %while.cond.preheader, label %sem_check_validity.exit
+
+while.cond.preheader: ; preds = %entry
+ %tmp4 = getelementptr inbounds %struct._sem* %sem, i64 0, i32 1, i32 1
+ br label %while.cond
+
+sem_check_validity.exit: ; preds = %entry
+ ret void
+
+while.cond: ; preds = %while.body, %while.cond.preheader
+ br i1 undef, label %while.body, label %while.end
+
+while.body: ; preds = %while.cond
+ %0 = call i8 asm sideeffect "\09lock ; \09\09\09cmpxchgl $2,$1 ;\09 sete\09$0 ;\09\091:\09\09\09\09# atomic_cmpset_int", "={ax},=*m,r,{ax},*m,~{memory},~{dirflag},~{fpsr},~{flags}"(i32* %tmp4, i32 undef, i32 undef, i32* %tmp4) nounwind, !srcloc !0
+ br i1 undef, label %while.cond, label %return
+
+while.end: ; preds = %while.cond
+ br i1 undef, label %if.end18, label %return
+
+if.end18: ; preds = %while.end
+ unreachable
+
+return: ; preds = %while.end, %while.body
+ ret void
+}
+
+!0 = metadata !{i32 158484}
diff --git a/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll b/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
new file mode 100644
index 0000000..8fe0309
--- /dev/null
+++ b/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -combiner-alias-analysis -march=x86-64 | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.4"
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+define fastcc i32 @cli_magic_scandesc(i8* %in) nounwind ssp {
+entry:
+ %a = alloca [64 x i8]
+ %b = getelementptr inbounds [64 x i8]* %a, i64 0, i32 0
+ %c = getelementptr inbounds [64 x i8]* %a, i64 0, i32 30
+ %d = load i8* %b, align 8
+ %e = load i8* %c, align 8
+ %f = bitcast [64 x i8]* %a to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %f, i8* %in, i64 64, i32 8, i1 false) nounwind
+ store i8 %d, i8* %b, align 8
+ store i8 %e, i8* %c, align 8
+ ret i32 0
+}
+
+; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip), %rax
+; CHECK: movb 30(%rsp), %dl
+; CHECK: movb (%rsp), %sil
+; CHECK: movb %sil, (%rsp)
+; CHECK: movb %dl, 30(%rsp)
+; CHECK: callq ___stack_chk_fail
diff --git a/test/CodeGen/X86/2010-09-30-CMOV-JumpTable-PHI.ll b/test/CodeGen/X86/2010-09-30-CMOV-JumpTable-PHI.ll
new file mode 100644
index 0000000..cae81d0
--- /dev/null
+++ b/test/CodeGen/X86/2010-09-30-CMOV-JumpTable-PHI.ll
@@ -0,0 +1,71 @@
+; RUN: llc -verify-machineinstrs -cgp-critical-edge-splitting=0 -mcpu=i386 < %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+; The bb.i basic block gets split while emitting the schedule because
+; -mcpu=i386 doesn't have CMOV.'
+;
+; That causes the PHI to be updated wrong because the jumptable data structure is remembering the original MBB.
+;
+; -cgp-critical-edge-splitting=0 prevents the edge to PHI from being split.
+
+@.str146 = external constant [4 x i8], align 1
+@.str706 = external constant [4 x i8], align 1
+@.str1189 = external constant [5 x i8], align 1
+
+declare i32 @memcmp(i8* nocapture, i8* nocapture, i32) nounwind readonly
+declare i32 @strlen(i8* nocapture) nounwind readonly
+
+define hidden zeroext i8 @f(i8* %this, i8* %Name.0, i32 %Name.1, i8* noalias %NameLoc, i8* %Operands) nounwind align 2 {
+bb.i:
+ %0 = icmp eq i8 undef, 0
+ %iftmp.285.0 = select i1 %0, i8* getelementptr inbounds ([5 x i8]* @.str1189, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8]* @.str706, i32 0, i32 0)
+ %1 = call i32 @strlen(i8* %iftmp.285.0) nounwind readonly
+ switch i32 %Name.1, label %_ZNK4llvm12StringSwitchINS_9StringRefES1_E7DefaultERKS1_.exit [
+ i32 3, label %bb1.i
+ i32 4, label %bb1.i1237
+ i32 5, label %bb1.i1266
+ i32 6, label %bb1.i1275
+ i32 2, label %bb1.i1434
+ i32 8, label %bb1.i1523
+ i32 7, label %bb1.i1537
+ ]
+
+bb1.i: ; preds = %bb.i
+ unreachable
+
+bb1.i1237: ; preds = %bb.i
+ br i1 undef, label %bb.i1820, label %bb1.i1241
+
+bb1.i1241: ; preds = %bb1.i1237
+ unreachable
+
+bb1.i1266: ; preds = %bb.i
+ unreachable
+
+bb1.i1275: ; preds = %bb.i
+ unreachable
+
+bb1.i1434: ; preds = %bb.i
+ unreachable
+
+bb1.i1523: ; preds = %bb.i
+ unreachable
+
+bb1.i1537: ; preds = %bb.i
+ unreachable
+
+bb.i1820: ; preds = %bb1.i1237
+ br label %_ZNK4llvm12StringSwitchINS_9StringRefES1_E7DefaultERKS1_.exit
+
+_ZNK4llvm12StringSwitchINS_9StringRefES1_E7DefaultERKS1_.exit: ; preds = %bb.i1820, %bb.i
+ %PatchedName.0.0 = phi i8* [ undef, %bb.i1820 ], [ %Name.0, %bb.i ]
+ br i1 undef, label %bb141, label %_ZNK4llvm9StringRef10startswithES0_.exit
+
+_ZNK4llvm9StringRef10startswithES0_.exit: ; preds = %_ZNK4llvm12StringSwitchINS_9StringRefES1_E7DefaultERKS1_.exit
+ %2 = call i32 @memcmp(i8* %PatchedName.0.0, i8* getelementptr inbounds ([4 x i8]* @.str146, i32 0, i32 0), i32 3) nounwind readonly
+ unreachable
+
+bb141: ; preds = %_ZNK4llvm12StringSwitchINS_9StringRefES1_E7DefaultERKS1_.exit
+ unreachable
+}
diff --git a/test/CodeGen/X86/2010-10-08-cmpxchg8b.ll b/test/CodeGen/X86/2010-10-08-cmpxchg8b.ll
new file mode 100644
index 0000000..40e7f01
--- /dev/null
+++ b/test/CodeGen/X86/2010-10-08-cmpxchg8b.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin | FileCheck %s
+; PR8297
+;
+; On i386, i64 cmpxchg is lowered during legalize types to extract the
+; 64-bit result into a pair of fixed regs. So creation of the DAG node
+; happens in a different place. See
+; X86TargetLowering::ReplaceNodeResults, case ATOMIC_CMP_SWAP.
+;
+; Neither Atomic-xx.ll nor atomic_op.ll cover this. Those tests were
+; autogenerated from C source before 64-bit variants were supported.
+;
+; Note that this case requires a loop around the cmpxchg to force
+; machine licm to query alias anlysis, exposing a bad
+; MachineMemOperand.
+define void @foo(i64* %ptr) nounwind inlinehint {
+entry:
+ br label %loop
+loop:
+; CHECK: lock
+; CHECK-NEXT: cmpxchg8b
+ %r = call i64 @llvm.atomic.cmp.swap.i64.p0i64(i64* %ptr, i64 0, i64 1)
+ %stored1 = icmp eq i64 %r, 0
+ br i1 %stored1, label %loop, label %continue
+continue:
+ ret void
+}
+
+declare i64 @llvm.atomic.cmp.swap.i64.p0i64(i64* nocapture, i64, i64) nounwind
diff --git a/test/CodeGen/X86/2010-11-02-DbgParameter.ll b/test/CodeGen/X86/2010-11-02-DbgParameter.ll
new file mode 100644
index 0000000..79c0cf3
--- /dev/null
+++ b/test/CodeGen/X86/2010-11-02-DbgParameter.ll
@@ -0,0 +1,35 @@
+; RUN: llc -O2 -asm-verbose < %s | FileCheck %s
+; Radar 8616981
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin11.0.0"
+
+%struct.bar = type { i32, i32 }
+
+define i32 @foo(%struct.bar* nocapture %i) nounwind readnone optsize noinline ssp {
+; CHECK: TAG_formal_parameter
+entry:
+ tail call void @llvm.dbg.value(metadata !{%struct.bar* %i}, i64 0, metadata !6), !dbg !12
+ ret i32 1, !dbg !13
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+!llvm.dbg.lv.foo = !{!6}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (%struct.bar*)* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"one.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"one.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 117922)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 590081, metadata !0, metadata !"i", metadata !1, i32 3, metadata !7, i32 0} ; [ DW_TAG_arg_variable ]
+!7 = metadata !{i32 589839, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
+!8 = metadata !{i32 589843, metadata !1, metadata !"bar", metadata !1, i32 2, i64 64, i64 32, i64 0, i32 0, null, metadata !9, i32 0, null} ; [ DW_TAG_structure_type ]
+!9 = metadata !{metadata !10, metadata !11}
+!10 = metadata !{i32 589837, metadata !1, metadata !"x", metadata !1, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!11 = metadata !{i32 589837, metadata !1, metadata !"y", metadata !1, i32 2, i64 32, i64 32, i64 32, i32 0, metadata !5} ; [ DW_TAG_member ]
+!12 = metadata !{i32 3, i32 47, metadata !0, null}
+!13 = metadata !{i32 4, i32 2, metadata !14, null}
+!14 = metadata !{i32 589835, metadata !0, i32 3, i32 50, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/CodeGen/X86/2010-11-09-MOVLPS.ll b/test/CodeGen/X86/2010-11-09-MOVLPS.ll
new file mode 100644
index 0000000..2368f3f
--- /dev/null
+++ b/test/CodeGen/X86/2010-11-09-MOVLPS.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -march=x86-64 -O0
+; PR8211
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+module asm "\09.ident\09\22GCC: (GNU) 4.5.2 20100914 (prerelease) LLVM: 114628\22"
+
+%0 = type { %"int[]" }
+%float = type float
+%"float[]" = type [4 x float]
+%int = type i32
+%"int[]" = type [4 x i32]
+%"long unsigned int" = type i64
+
+define void @swizzle(i8* %a, %0* %b, %0* %c) nounwind {
+entry:
+ %a_addr = alloca i8*
+ %b_addr = alloca %0*
+ %c_addr = alloca %0*
+ %"alloca point" = bitcast i32 0 to i32
+ store i8* %a, i8** %a_addr
+ store %0* %b, %0** %b_addr
+ store %0* %c, %0** %c_addr
+ %0 = load i8** %a_addr, align 64
+ %1 = load %0** %b_addr, align 64
+ %2 = load %0** %c_addr, align 64
+ %"ssa point" = bitcast i32 0 to i32
+ br label %"2"
+
+"2": ; preds = %entry
+ %3 = bitcast i8* %0 to <2 x i32>*
+ %4 = getelementptr inbounds %0* %1, i32 0, i32 0
+ %5 = bitcast %"int[]"* %4 to <4 x float>*
+ %6 = load <4 x float>* %5, align 16
+ %7 = bitcast <2 x i32>* %3 to <2 x float>*
+ %8 = bitcast <2 x float>* %7 to double*
+ %9 = load double* %8
+ %10 = insertelement <2 x double> undef, double %9, i32 0
+ %11 = insertelement <2 x double> %10, double undef, i32 1
+ %12 = bitcast <2 x double> %11 to <4 x float>
+ %13 = shufflevector <4 x float> %6, <4 x float> %12, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+ %14 = getelementptr inbounds %0* %1, i32 0, i32 0
+ %15 = bitcast %"int[]"* %14 to <4 x float>*
+ store <4 x float> %13, <4 x float>* %15, align 16
+ %16 = bitcast i8* %0 to <2 x i32>*
+ %17 = bitcast <2 x i32>* %16 to i8*
+ %18 = getelementptr i8* %17, i64 8
+ %19 = bitcast i8* %18 to <2 x i32>*
+ %20 = getelementptr inbounds %0* %2, i32 0, i32 0
+ %21 = bitcast %"int[]"* %20 to <4 x float>*
+ %22 = load <4 x float>* %21, align 16
+ %23 = bitcast <2 x i32>* %19 to <2 x float>*
+ %24 = bitcast <2 x float>* %23 to double*
+ %25 = load double* %24
+ %26 = insertelement <2 x double> undef, double %25, i32 0
+ %27 = insertelement <2 x double> %26, double undef, i32 1
+ %28 = bitcast <2 x double> %27 to <4 x float>
+ %29 = shufflevector <4 x float> %22, <4 x float> %28, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+ %30 = getelementptr inbounds %0* %2, i32 0, i32 0
+ %31 = bitcast %"int[]"* %30 to <4 x float>*
+ store <4 x float> %29, <4 x float>* %31, align 16
+ br label %return
+
+return: ; preds = %"2"
+ ret void
+}
diff --git a/test/CodeGen/X86/3addr-or.ll b/test/CodeGen/X86/3addr-or.ll
index 30a1f36..912bdc2 100644
--- a/test/CodeGen/X86/3addr-or.ll
+++ b/test/CodeGen/X86/3addr-or.ll
@@ -1,9 +1,9 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
; rdar://7527734
-define i32 @test(i32 %x) nounwind readnone ssp {
+define i32 @test1(i32 %x) nounwind readnone ssp {
entry:
-; CHECK: test:
+; CHECK: test1:
; CHECK: leal 3(%rdi), %eax
%0 = shl i32 %x, 5 ; <i32> [#uses=1]
%1 = or i32 %0, 3 ; <i32> [#uses=1]
@@ -25,3 +25,37 @@ define i64 @test2(i8 %A, i8 %B) nounwind {
%H = or i64 %G, %E ; <i64> [#uses=1]
ret i64 %H
}
+
+;; Test that OR is only emitted as LEA, not as ADD.
+
+define void @test3(i32 %x, i32* %P) nounwind readnone ssp {
+entry:
+; No reason to emit an add here, should be an or.
+; CHECK: test3:
+; CHECK: orl $3, %edi
+ %0 = shl i32 %x, 5
+ %1 = or i32 %0, 3
+ store i32 %1, i32* %P
+ ret void
+}
+
+define i32 @test4(i32 %a, i32 %b) nounwind readnone ssp {
+entry:
+ %and = and i32 %a, 6
+ %and2 = and i32 %b, 16
+ %or = or i32 %and2, %and
+ ret i32 %or
+; CHECK: test4:
+; CHECK: leal (%rsi,%rdi), %eax
+}
+
+define void @test5(i32 %a, i32 %b, i32* nocapture %P) nounwind ssp {
+entry:
+ %and = and i32 %a, 6
+ %and2 = and i32 %b, 16
+ %or = or i32 %and2, %and
+ store i32 %or, i32* %P, align 4
+ ret void
+; CHECK: test5:
+; CHECK: orl
+}
diff --git a/test/CodeGen/X86/abi-isel.ll b/test/CodeGen/X86/abi-isel.ll
index 23042b6..38868ff 100644
--- a/test/CodeGen/X86/abi-isel.ll
+++ b/test/CodeGen/X86/abi-isel.ll
@@ -72,7 +72,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _foo00:
-; DARWIN-32-PIC: call L0$pb
+; DARWIN-32-PIC: calll L0$pb
; DARWIN-32-PIC-NEXT: L0$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_src$non_lazy_ptr-L0$pb(%eax), %ecx
@@ -144,7 +144,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _fxo00:
-; DARWIN-32-PIC: call L1$pb
+; DARWIN-32-PIC: calll L1$pb
; DARWIN-32-PIC-NEXT: L1$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_xsrc$non_lazy_ptr-L1$pb(%eax), %ecx
@@ -208,7 +208,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _foo01:
-; DARWIN-32-PIC: call L2$pb
+; DARWIN-32-PIC: calll L2$pb
; DARWIN-32-PIC-NEXT: L2$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_dst$non_lazy_ptr-L2$pb(%eax), %ecx
@@ -268,7 +268,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _fxo01:
-; DARWIN-32-PIC: call L3$pb
+; DARWIN-32-PIC: calll L3$pb
; DARWIN-32-PIC-NEXT: L3$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_xdst$non_lazy_ptr-L3$pb(%eax), %ecx
@@ -342,7 +342,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _foo02:
-; DARWIN-32-PIC: call L4$pb
+; DARWIN-32-PIC: calll L4$pb
; DARWIN-32-PIC-NEXT: L4$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_src$non_lazy_ptr-L4$pb(%eax), %ecx
@@ -424,7 +424,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _fxo02:
-; DARWIN-32-PIC: call L5$pb
+; DARWIN-32-PIC: calll L5$pb
; DARWIN-32-PIC-NEXT: L5$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_xsrc$non_lazy_ptr-L5$pb(%eax), %ecx
@@ -497,7 +497,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _foo03:
-; DARWIN-32-PIC: call L6$pb
+; DARWIN-32-PIC: calll L6$pb
; DARWIN-32-PIC-NEXT: L6$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl _dsrc-L6$pb(%eax), %ecx
@@ -551,7 +551,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _foo04:
-; DARWIN-32-PIC: call L7$pb
+; DARWIN-32-PIC: calll L7$pb
; DARWIN-32-PIC-NEXT: L7$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal _ddst-L7$pb(%eax), %ecx
@@ -619,7 +619,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _foo05:
-; DARWIN-32-PIC: call L8$pb
+; DARWIN-32-PIC: calll L8$pb
; DARWIN-32-PIC-NEXT: L8$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl _dsrc-L8$pb(%eax), %ecx
@@ -682,7 +682,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _foo06:
-; DARWIN-32-PIC: call L9$pb
+; DARWIN-32-PIC: calll L9$pb
; DARWIN-32-PIC-NEXT: L9$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl _lsrc-L9$pb(%eax), %ecx
@@ -735,7 +735,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _foo07:
-; DARWIN-32-PIC: call L10$pb
+; DARWIN-32-PIC: calll L10$pb
; DARWIN-32-PIC-NEXT: L10$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal _ldst-L10$pb(%eax), %ecx
@@ -801,7 +801,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _foo08:
-; DARWIN-32-PIC: call L11$pb
+; DARWIN-32-PIC: calll L11$pb
; DARWIN-32-PIC-NEXT: L11$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl _lsrc-L11$pb(%eax), %ecx
@@ -868,7 +868,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _qux00:
-; DARWIN-32-PIC: call L12$pb
+; DARWIN-32-PIC: calll L12$pb
; DARWIN-32-PIC-NEXT: L12$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_src$non_lazy_ptr-L12$pb(%eax), %ecx
@@ -939,7 +939,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _qxx00:
-; DARWIN-32-PIC: call L13$pb
+; DARWIN-32-PIC: calll L13$pb
; DARWIN-32-PIC-NEXT: L13$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_xsrc$non_lazy_ptr-L13$pb(%eax), %ecx
@@ -1005,7 +1005,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _qux01:
-; DARWIN-32-PIC: call L14$pb
+; DARWIN-32-PIC: calll L14$pb
; DARWIN-32-PIC-NEXT: L14$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_dst$non_lazy_ptr-L14$pb(%eax), %ecx
@@ -1071,7 +1071,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _qxx01:
-; DARWIN-32-PIC: call L15$pb
+; DARWIN-32-PIC: calll L15$pb
; DARWIN-32-PIC-NEXT: L15$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_xdst$non_lazy_ptr-L15$pb(%eax), %ecx
@@ -1150,7 +1150,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _qux02:
-; DARWIN-32-PIC: call L16$pb
+; DARWIN-32-PIC: calll L16$pb
; DARWIN-32-PIC-NEXT: L16$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_src$non_lazy_ptr-L16$pb(%eax), %ecx
@@ -1233,7 +1233,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _qxx02:
-; DARWIN-32-PIC: call L17$pb
+; DARWIN-32-PIC: calll L17$pb
; DARWIN-32-PIC-NEXT: L17$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_xsrc$non_lazy_ptr-L17$pb(%eax), %ecx
@@ -1306,7 +1306,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _qux03:
-; DARWIN-32-PIC: call L18$pb
+; DARWIN-32-PIC: calll L18$pb
; DARWIN-32-PIC-NEXT: L18$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl (_dsrc-L18$pb)+64(%eax), %ecx
@@ -1361,7 +1361,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _qux04:
-; DARWIN-32-PIC: call L19$pb
+; DARWIN-32-PIC: calll L19$pb
; DARWIN-32-PIC-NEXT: L19$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal (_ddst-L19$pb)+64(%eax), %ecx
@@ -1430,7 +1430,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _qux05:
-; DARWIN-32-PIC: call L20$pb
+; DARWIN-32-PIC: calll L20$pb
; DARWIN-32-PIC-NEXT: L20$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl (_dsrc-L20$pb)+64(%eax), %ecx
@@ -1493,7 +1493,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _qux06:
-; DARWIN-32-PIC: call L21$pb
+; DARWIN-32-PIC: calll L21$pb
; DARWIN-32-PIC-NEXT: L21$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl (_lsrc-L21$pb)+64(%eax), %ecx
@@ -1546,7 +1546,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _qux07:
-; DARWIN-32-PIC: call L22$pb
+; DARWIN-32-PIC: calll L22$pb
; DARWIN-32-PIC-NEXT: L22$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal (_ldst-L22$pb)+64(%eax), %ecx
@@ -1613,7 +1613,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _qux08:
-; DARWIN-32-PIC: call L23$pb
+; DARWIN-32-PIC: calll L23$pb
; DARWIN-32-PIC-NEXT: L23$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl (_lsrc-L23$pb)+64(%eax), %ecx
@@ -1686,7 +1686,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _ind00:
-; DARWIN-32-PIC: call L24$pb
+; DARWIN-32-PIC: calll L24$pb
; DARWIN-32-PIC-NEXT: L24$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -1764,7 +1764,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _ixd00:
-; DARWIN-32-PIC: call L25$pb
+; DARWIN-32-PIC: calll L25$pb
; DARWIN-32-PIC-NEXT: L25$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -1840,7 +1840,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _ind01:
-; DARWIN-32-PIC: call L26$pb
+; DARWIN-32-PIC: calll L26$pb
; DARWIN-32-PIC-NEXT: L26$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -1916,7 +1916,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _ixd01:
-; DARWIN-32-PIC: call L27$pb
+; DARWIN-32-PIC: calll L27$pb
; DARWIN-32-PIC-NEXT: L27$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -2001,7 +2001,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _ind02:
-; DARWIN-32-PIC: call L28$pb
+; DARWIN-32-PIC: calll L28$pb
; DARWIN-32-PIC-NEXT: L28$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -2090,7 +2090,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _ixd02:
-; DARWIN-32-PIC: call L29$pb
+; DARWIN-32-PIC: calll L29$pb
; DARWIN-32-PIC-NEXT: L29$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -2170,7 +2170,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _ind03:
-; DARWIN-32-PIC: call L30$pb
+; DARWIN-32-PIC: calll L30$pb
; DARWIN-32-PIC-NEXT: L30$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -2242,7 +2242,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _ind04:
-; DARWIN-32-PIC: call L31$pb
+; DARWIN-32-PIC: calll L31$pb
; DARWIN-32-PIC-NEXT: L31$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -2320,7 +2320,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _ind05:
-; DARWIN-32-PIC: call L32$pb
+; DARWIN-32-PIC: calll L32$pb
; DARWIN-32-PIC-NEXT: L32$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -2395,7 +2395,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _ind06:
-; DARWIN-32-PIC: call L33$pb
+; DARWIN-32-PIC: calll L33$pb
; DARWIN-32-PIC-NEXT: L33$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -2466,7 +2466,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _ind07:
-; DARWIN-32-PIC: call L34$pb
+; DARWIN-32-PIC: calll L34$pb
; DARWIN-32-PIC-NEXT: L34$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -2543,7 +2543,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _ind08:
-; DARWIN-32-PIC: call L35$pb
+; DARWIN-32-PIC: calll L35$pb
; DARWIN-32-PIC-NEXT: L35$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -2621,7 +2621,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _off00:
-; DARWIN-32-PIC: call L36$pb
+; DARWIN-32-PIC: calll L36$pb
; DARWIN-32-PIC-NEXT: L36$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -2700,7 +2700,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _oxf00:
-; DARWIN-32-PIC: call L37$pb
+; DARWIN-32-PIC: calll L37$pb
; DARWIN-32-PIC-NEXT: L37$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -2777,7 +2777,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _off01:
-; DARWIN-32-PIC: call L38$pb
+; DARWIN-32-PIC: calll L38$pb
; DARWIN-32-PIC-NEXT: L38$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -2854,7 +2854,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _oxf01:
-; DARWIN-32-PIC: call L39$pb
+; DARWIN-32-PIC: calll L39$pb
; DARWIN-32-PIC-NEXT: L39$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -2940,7 +2940,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _off02:
-; DARWIN-32-PIC: call L40$pb
+; DARWIN-32-PIC: calll L40$pb
; DARWIN-32-PIC-NEXT: L40$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -3030,7 +3030,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _oxf02:
-; DARWIN-32-PIC: call L41$pb
+; DARWIN-32-PIC: calll L41$pb
; DARWIN-32-PIC-NEXT: L41$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -3111,7 +3111,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _off03:
-; DARWIN-32-PIC: call L42$pb
+; DARWIN-32-PIC: calll L42$pb
; DARWIN-32-PIC-NEXT: L42$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -3184,7 +3184,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _off04:
-; DARWIN-32-PIC: call L43$pb
+; DARWIN-32-PIC: calll L43$pb
; DARWIN-32-PIC-NEXT: L43$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -3263,7 +3263,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _off05:
-; DARWIN-32-PIC: call L44$pb
+; DARWIN-32-PIC: calll L44$pb
; DARWIN-32-PIC-NEXT: L44$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -3339,7 +3339,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _off06:
-; DARWIN-32-PIC: call L45$pb
+; DARWIN-32-PIC: calll L45$pb
; DARWIN-32-PIC-NEXT: L45$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -3411,7 +3411,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _off07:
-; DARWIN-32-PIC: call L46$pb
+; DARWIN-32-PIC: calll L46$pb
; DARWIN-32-PIC-NEXT: L46$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -3489,7 +3489,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _off08:
-; DARWIN-32-PIC: call L47$pb
+; DARWIN-32-PIC: calll L47$pb
; DARWIN-32-PIC-NEXT: L47$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -3560,7 +3560,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _moo00:
-; DARWIN-32-PIC: call L48$pb
+; DARWIN-32-PIC: calll L48$pb
; DARWIN-32-PIC-NEXT: L48$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_src$non_lazy_ptr-L48$pb(%eax), %ecx
@@ -3626,7 +3626,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _moo01:
-; DARWIN-32-PIC: call L49$pb
+; DARWIN-32-PIC: calll L49$pb
; DARWIN-32-PIC-NEXT: L49$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl $262144, %ecx
@@ -3705,7 +3705,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _moo02:
-; DARWIN-32-PIC: call L50$pb
+; DARWIN-32-PIC: calll L50$pb
; DARWIN-32-PIC-NEXT: L50$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_src$non_lazy_ptr-L50$pb(%eax), %ecx
@@ -3778,7 +3778,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _moo03:
-; DARWIN-32-PIC: call L51$pb
+; DARWIN-32-PIC: calll L51$pb
; DARWIN-32-PIC-NEXT: L51$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl (_dsrc-L51$pb)+262144(%eax), %ecx
@@ -3833,7 +3833,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _moo04:
-; DARWIN-32-PIC: call L52$pb
+; DARWIN-32-PIC: calll L52$pb
; DARWIN-32-PIC-NEXT: L52$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal (_ddst-L52$pb)+262144(%eax), %ecx
@@ -3902,7 +3902,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _moo05:
-; DARWIN-32-PIC: call L53$pb
+; DARWIN-32-PIC: calll L53$pb
; DARWIN-32-PIC-NEXT: L53$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl (_dsrc-L53$pb)+262144(%eax), %ecx
@@ -3965,7 +3965,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _moo06:
-; DARWIN-32-PIC: call L54$pb
+; DARWIN-32-PIC: calll L54$pb
; DARWIN-32-PIC-NEXT: L54$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl (_lsrc-L54$pb)+262144(%eax), %ecx
@@ -4018,7 +4018,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _moo07:
-; DARWIN-32-PIC: call L55$pb
+; DARWIN-32-PIC: calll L55$pb
; DARWIN-32-PIC-NEXT: L55$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal (_ldst-L55$pb)+262144(%eax), %ecx
@@ -4085,7 +4085,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _moo08:
-; DARWIN-32-PIC: call L56$pb
+; DARWIN-32-PIC: calll L56$pb
; DARWIN-32-PIC-NEXT: L56$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl (_lsrc-L56$pb)+262144(%eax), %ecx
@@ -4159,7 +4159,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _big00:
-; DARWIN-32-PIC: call L57$pb
+; DARWIN-32-PIC: calll L57$pb
; DARWIN-32-PIC-NEXT: L57$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -4236,7 +4236,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _big01:
-; DARWIN-32-PIC: call L58$pb
+; DARWIN-32-PIC: calll L58$pb
; DARWIN-32-PIC-NEXT: L58$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -4322,7 +4322,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _big02:
-; DARWIN-32-PIC: call L59$pb
+; DARWIN-32-PIC: calll L59$pb
; DARWIN-32-PIC-NEXT: L59$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -4403,7 +4403,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _big03:
-; DARWIN-32-PIC: call L60$pb
+; DARWIN-32-PIC: calll L60$pb
; DARWIN-32-PIC-NEXT: L60$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -4476,7 +4476,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _big04:
-; DARWIN-32-PIC: call L61$pb
+; DARWIN-32-PIC: calll L61$pb
; DARWIN-32-PIC-NEXT: L61$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -4555,7 +4555,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _big05:
-; DARWIN-32-PIC: call L62$pb
+; DARWIN-32-PIC: calll L62$pb
; DARWIN-32-PIC-NEXT: L62$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -4631,7 +4631,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _big06:
-; DARWIN-32-PIC: call L63$pb
+; DARWIN-32-PIC: calll L63$pb
; DARWIN-32-PIC-NEXT: L63$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -4703,7 +4703,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _big07:
-; DARWIN-32-PIC: call L64$pb
+; DARWIN-32-PIC: calll L64$pb
; DARWIN-32-PIC-NEXT: L64$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -4781,7 +4781,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _big08:
-; DARWIN-32-PIC: call L65$pb
+; DARWIN-32-PIC: calll L65$pb
; DARWIN-32-PIC-NEXT: L65$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -4840,7 +4840,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bar00:
-; DARWIN-32-PIC: call L66$pb
+; DARWIN-32-PIC: calll L66$pb
; DARWIN-32-PIC-NEXT: L66$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_src$non_lazy_ptr-L66$pb(%eax), %eax
@@ -4887,7 +4887,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bxr00:
-; DARWIN-32-PIC: call L67$pb
+; DARWIN-32-PIC: calll L67$pb
; DARWIN-32-PIC-NEXT: L67$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_xsrc$non_lazy_ptr-L67$pb(%eax), %eax
@@ -4934,7 +4934,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bar01:
-; DARWIN-32-PIC: call L68$pb
+; DARWIN-32-PIC: calll L68$pb
; DARWIN-32-PIC-NEXT: L68$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_dst$non_lazy_ptr-L68$pb(%eax), %eax
@@ -4981,7 +4981,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bxr01:
-; DARWIN-32-PIC: call L69$pb
+; DARWIN-32-PIC: calll L69$pb
; DARWIN-32-PIC-NEXT: L69$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_xdst$non_lazy_ptr-L69$pb(%eax), %eax
@@ -5028,7 +5028,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bar02:
-; DARWIN-32-PIC: call L70$pb
+; DARWIN-32-PIC: calll L70$pb
; DARWIN-32-PIC-NEXT: L70$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_ptr$non_lazy_ptr-L70$pb(%eax), %eax
@@ -5075,7 +5075,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bar03:
-; DARWIN-32-PIC: call L71$pb
+; DARWIN-32-PIC: calll L71$pb
; DARWIN-32-PIC-NEXT: L71$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal _dsrc-L71$pb(%eax), %eax
@@ -5122,7 +5122,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bar04:
-; DARWIN-32-PIC: call L72$pb
+; DARWIN-32-PIC: calll L72$pb
; DARWIN-32-PIC-NEXT: L72$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal _ddst-L72$pb(%eax), %eax
@@ -5169,7 +5169,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bar05:
-; DARWIN-32-PIC: call L73$pb
+; DARWIN-32-PIC: calll L73$pb
; DARWIN-32-PIC-NEXT: L73$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal _dptr-L73$pb(%eax), %eax
@@ -5216,7 +5216,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bar06:
-; DARWIN-32-PIC: call L74$pb
+; DARWIN-32-PIC: calll L74$pb
; DARWIN-32-PIC-NEXT: L74$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal _lsrc-L74$pb(%eax), %eax
@@ -5263,7 +5263,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bar07:
-; DARWIN-32-PIC: call L75$pb
+; DARWIN-32-PIC: calll L75$pb
; DARWIN-32-PIC-NEXT: L75$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal _ldst-L75$pb(%eax), %eax
@@ -5310,7 +5310,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bar08:
-; DARWIN-32-PIC: call L76$pb
+; DARWIN-32-PIC: calll L76$pb
; DARWIN-32-PIC-NEXT: L76$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal _lptr-L76$pb(%eax), %eax
@@ -5357,7 +5357,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _har00:
-; DARWIN-32-PIC: call L77$pb
+; DARWIN-32-PIC: calll L77$pb
; DARWIN-32-PIC-NEXT: L77$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_src$non_lazy_ptr-L77$pb(%eax), %eax
@@ -5404,7 +5404,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _hxr00:
-; DARWIN-32-PIC: call L78$pb
+; DARWIN-32-PIC: calll L78$pb
; DARWIN-32-PIC-NEXT: L78$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_xsrc$non_lazy_ptr-L78$pb(%eax), %eax
@@ -5451,7 +5451,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _har01:
-; DARWIN-32-PIC: call L79$pb
+; DARWIN-32-PIC: calll L79$pb
; DARWIN-32-PIC-NEXT: L79$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_dst$non_lazy_ptr-L79$pb(%eax), %eax
@@ -5498,7 +5498,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _hxr01:
-; DARWIN-32-PIC: call L80$pb
+; DARWIN-32-PIC: calll L80$pb
; DARWIN-32-PIC-NEXT: L80$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_xdst$non_lazy_ptr-L80$pb(%eax), %eax
@@ -5549,7 +5549,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _har02:
-; DARWIN-32-PIC: call L81$pb
+; DARWIN-32-PIC: calll L81$pb
; DARWIN-32-PIC-NEXT: L81$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_ptr$non_lazy_ptr-L81$pb(%eax), %eax
@@ -5600,7 +5600,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _har03:
-; DARWIN-32-PIC: call L82$pb
+; DARWIN-32-PIC: calll L82$pb
; DARWIN-32-PIC-NEXT: L82$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal _dsrc-L82$pb(%eax), %eax
@@ -5647,7 +5647,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _har04:
-; DARWIN-32-PIC: call L83$pb
+; DARWIN-32-PIC: calll L83$pb
; DARWIN-32-PIC-NEXT: L83$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal _ddst-L83$pb(%eax), %eax
@@ -5697,7 +5697,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _har05:
-; DARWIN-32-PIC: call L84$pb
+; DARWIN-32-PIC: calll L84$pb
; DARWIN-32-PIC-NEXT: L84$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl _dptr-L84$pb(%eax), %eax
@@ -5744,7 +5744,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _har06:
-; DARWIN-32-PIC: call L85$pb
+; DARWIN-32-PIC: calll L85$pb
; DARWIN-32-PIC-NEXT: L85$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal _lsrc-L85$pb(%eax), %eax
@@ -5791,7 +5791,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _har07:
-; DARWIN-32-PIC: call L86$pb
+; DARWIN-32-PIC: calll L86$pb
; DARWIN-32-PIC-NEXT: L86$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal _ldst-L86$pb(%eax), %eax
@@ -5840,7 +5840,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _har08:
-; DARWIN-32-PIC: call L87$pb
+; DARWIN-32-PIC: calll L87$pb
; DARWIN-32-PIC-NEXT: L87$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl _lptr-L87$pb(%eax), %eax
@@ -5889,7 +5889,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bat00:
-; DARWIN-32-PIC: call L88$pb
+; DARWIN-32-PIC: calll L88$pb
; DARWIN-32-PIC-NEXT: L88$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_src$non_lazy_ptr-L88$pb(%eax), %eax
@@ -5942,7 +5942,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bxt00:
-; DARWIN-32-PIC: call L89$pb
+; DARWIN-32-PIC: calll L89$pb
; DARWIN-32-PIC-NEXT: L89$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_xsrc$non_lazy_ptr-L89$pb(%eax), %eax
@@ -5995,7 +5995,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bat01:
-; DARWIN-32-PIC: call L90$pb
+; DARWIN-32-PIC: calll L90$pb
; DARWIN-32-PIC-NEXT: L90$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_dst$non_lazy_ptr-L90$pb(%eax), %eax
@@ -6048,7 +6048,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bxt01:
-; DARWIN-32-PIC: call L91$pb
+; DARWIN-32-PIC: calll L91$pb
; DARWIN-32-PIC-NEXT: L91$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_xdst$non_lazy_ptr-L91$pb(%eax), %eax
@@ -6110,7 +6110,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bat02:
-; DARWIN-32-PIC: call L92$pb
+; DARWIN-32-PIC: calll L92$pb
; DARWIN-32-PIC-NEXT: L92$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_ptr$non_lazy_ptr-L92$pb(%eax), %eax
@@ -6166,7 +6166,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bat03:
-; DARWIN-32-PIC: call L93$pb
+; DARWIN-32-PIC: calll L93$pb
; DARWIN-32-PIC-NEXT: L93$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal (_dsrc-L93$pb)+64(%eax), %eax
@@ -6214,7 +6214,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bat04:
-; DARWIN-32-PIC: call L94$pb
+; DARWIN-32-PIC: calll L94$pb
; DARWIN-32-PIC-NEXT: L94$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal (_ddst-L94$pb)+64(%eax), %eax
@@ -6271,7 +6271,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bat05:
-; DARWIN-32-PIC: call L95$pb
+; DARWIN-32-PIC: calll L95$pb
; DARWIN-32-PIC-NEXT: L95$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl _dptr-L95$pb(%eax), %eax
@@ -6322,7 +6322,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bat06:
-; DARWIN-32-PIC: call L96$pb
+; DARWIN-32-PIC: calll L96$pb
; DARWIN-32-PIC-NEXT: L96$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal (_lsrc-L96$pb)+64(%eax), %eax
@@ -6369,7 +6369,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bat07:
-; DARWIN-32-PIC: call L97$pb
+; DARWIN-32-PIC: calll L97$pb
; DARWIN-32-PIC-NEXT: L97$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal (_ldst-L97$pb)+64(%eax), %eax
@@ -6425,7 +6425,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bat08:
-; DARWIN-32-PIC: call L98$pb
+; DARWIN-32-PIC: calll L98$pb
; DARWIN-32-PIC-NEXT: L98$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl _lptr-L98$pb(%eax), %eax
@@ -6478,7 +6478,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bam00:
-; DARWIN-32-PIC: call L99$pb
+; DARWIN-32-PIC: calll L99$pb
; DARWIN-32-PIC-NEXT: L99$pb:
; DARWIN-32-PIC-NEXT: popl %ecx
; DARWIN-32-PIC-NEXT: movl $262144, %eax
@@ -6531,7 +6531,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bam01:
-; DARWIN-32-PIC: call L100$pb
+; DARWIN-32-PIC: calll L100$pb
; DARWIN-32-PIC-NEXT: L100$pb:
; DARWIN-32-PIC-NEXT: popl %ecx
; DARWIN-32-PIC-NEXT: movl $262144, %eax
@@ -6584,7 +6584,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bxm01:
-; DARWIN-32-PIC: call L101$pb
+; DARWIN-32-PIC: calll L101$pb
; DARWIN-32-PIC-NEXT: L101$pb:
; DARWIN-32-PIC-NEXT: popl %ecx
; DARWIN-32-PIC-NEXT: movl $262144, %eax
@@ -6646,7 +6646,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bam02:
-; DARWIN-32-PIC: call L102$pb
+; DARWIN-32-PIC: calll L102$pb
; DARWIN-32-PIC-NEXT: L102$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_ptr$non_lazy_ptr-L102$pb(%eax), %ecx
@@ -6702,7 +6702,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bam03:
-; DARWIN-32-PIC: call L103$pb
+; DARWIN-32-PIC: calll L103$pb
; DARWIN-32-PIC-NEXT: L103$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal (_dsrc-L103$pb)+262144(%eax), %eax
@@ -6750,7 +6750,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bam04:
-; DARWIN-32-PIC: call L104$pb
+; DARWIN-32-PIC: calll L104$pb
; DARWIN-32-PIC-NEXT: L104$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal (_ddst-L104$pb)+262144(%eax), %eax
@@ -6807,7 +6807,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bam05:
-; DARWIN-32-PIC: call L105$pb
+; DARWIN-32-PIC: calll L105$pb
; DARWIN-32-PIC-NEXT: L105$pb:
; DARWIN-32-PIC-NEXT: popl %ecx
; DARWIN-32-PIC-NEXT: movl $262144, %eax
@@ -6858,7 +6858,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bam06:
-; DARWIN-32-PIC: call L106$pb
+; DARWIN-32-PIC: calll L106$pb
; DARWIN-32-PIC-NEXT: L106$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal (_lsrc-L106$pb)+262144(%eax), %eax
@@ -6905,7 +6905,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bam07:
-; DARWIN-32-PIC: call L107$pb
+; DARWIN-32-PIC: calll L107$pb
; DARWIN-32-PIC-NEXT: L107$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal (_ldst-L107$pb)+262144(%eax), %eax
@@ -6961,7 +6961,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _bam08:
-; DARWIN-32-PIC: call L108$pb
+; DARWIN-32-PIC: calll L108$pb
; DARWIN-32-PIC-NEXT: L108$pb:
; DARWIN-32-PIC-NEXT: popl %ecx
; DARWIN-32-PIC-NEXT: movl $262144, %eax
@@ -7021,7 +7021,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cat00:
-; DARWIN-32-PIC: call L109$pb
+; DARWIN-32-PIC: calll L109$pb
; DARWIN-32-PIC-NEXT: L109$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -7082,7 +7082,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cxt00:
-; DARWIN-32-PIC: call L110$pb
+; DARWIN-32-PIC: calll L110$pb
; DARWIN-32-PIC-NEXT: L110$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -7143,7 +7143,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cat01:
-; DARWIN-32-PIC: call L111$pb
+; DARWIN-32-PIC: calll L111$pb
; DARWIN-32-PIC-NEXT: L111$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -7204,7 +7204,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cxt01:
-; DARWIN-32-PIC: call L112$pb
+; DARWIN-32-PIC: calll L112$pb
; DARWIN-32-PIC-NEXT: L112$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -7272,7 +7272,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cat02:
-; DARWIN-32-PIC: call L113$pb
+; DARWIN-32-PIC: calll L113$pb
; DARWIN-32-PIC-NEXT: L113$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_ptr$non_lazy_ptr-L113$pb(%eax), %eax
@@ -7336,7 +7336,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cat03:
-; DARWIN-32-PIC: call L114$pb
+; DARWIN-32-PIC: calll L114$pb
; DARWIN-32-PIC-NEXT: L114$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -7395,7 +7395,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cat04:
-; DARWIN-32-PIC: call L115$pb
+; DARWIN-32-PIC: calll L115$pb
; DARWIN-32-PIC-NEXT: L115$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -7461,7 +7461,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cat05:
-; DARWIN-32-PIC: call L116$pb
+; DARWIN-32-PIC: calll L116$pb
; DARWIN-32-PIC-NEXT: L116$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -7521,7 +7521,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cat06:
-; DARWIN-32-PIC: call L117$pb
+; DARWIN-32-PIC: calll L117$pb
; DARWIN-32-PIC-NEXT: L117$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -7580,7 +7580,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cat07:
-; DARWIN-32-PIC: call L118$pb
+; DARWIN-32-PIC: calll L118$pb
; DARWIN-32-PIC-NEXT: L118$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -7645,7 +7645,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cat08:
-; DARWIN-32-PIC: call L119$pb
+; DARWIN-32-PIC: calll L119$pb
; DARWIN-32-PIC-NEXT: L119$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -7706,7 +7706,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cam00:
-; DARWIN-32-PIC: call L120$pb
+; DARWIN-32-PIC: calll L120$pb
; DARWIN-32-PIC-NEXT: L120$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -7767,7 +7767,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cxm00:
-; DARWIN-32-PIC: call L121$pb
+; DARWIN-32-PIC: calll L121$pb
; DARWIN-32-PIC-NEXT: L121$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -7828,7 +7828,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cam01:
-; DARWIN-32-PIC: call L122$pb
+; DARWIN-32-PIC: calll L122$pb
; DARWIN-32-PIC-NEXT: L122$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -7889,7 +7889,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cxm01:
-; DARWIN-32-PIC: call L123$pb
+; DARWIN-32-PIC: calll L123$pb
; DARWIN-32-PIC-NEXT: L123$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -7957,7 +7957,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cam02:
-; DARWIN-32-PIC: call L124$pb
+; DARWIN-32-PIC: calll L124$pb
; DARWIN-32-PIC-NEXT: L124$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_ptr$non_lazy_ptr-L124$pb(%eax), %eax
@@ -8021,7 +8021,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cam03:
-; DARWIN-32-PIC: call L125$pb
+; DARWIN-32-PIC: calll L125$pb
; DARWIN-32-PIC-NEXT: L125$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -8080,7 +8080,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cam04:
-; DARWIN-32-PIC: call L126$pb
+; DARWIN-32-PIC: calll L126$pb
; DARWIN-32-PIC-NEXT: L126$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -8146,7 +8146,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cam05:
-; DARWIN-32-PIC: call L127$pb
+; DARWIN-32-PIC: calll L127$pb
; DARWIN-32-PIC-NEXT: L127$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -8206,7 +8206,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cam06:
-; DARWIN-32-PIC: call L128$pb
+; DARWIN-32-PIC: calll L128$pb
; DARWIN-32-PIC-NEXT: L128$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -8265,7 +8265,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cam07:
-; DARWIN-32-PIC: call L129$pb
+; DARWIN-32-PIC: calll L129$pb
; DARWIN-32-PIC-NEXT: L129$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -8330,7 +8330,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _cam08:
-; DARWIN-32-PIC: call L130$pb
+; DARWIN-32-PIC: calll L130$pb
; DARWIN-32-PIC-NEXT: L130$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl 4(%esp), %ecx
@@ -8376,25 +8376,25 @@ entry:
; LINUX-32-STATIC: lcallee:
; LINUX-32-STATIC: subl $4, %esp
-; LINUX-32-STATIC-NEXT: call x
-; LINUX-32-STATIC-NEXT: call x
-; LINUX-32-STATIC-NEXT: call x
-; LINUX-32-STATIC-NEXT: call x
-; LINUX-32-STATIC-NEXT: call x
-; LINUX-32-STATIC-NEXT: call x
-; LINUX-32-STATIC-NEXT: call x
+; LINUX-32-STATIC-NEXT: calll x
+; LINUX-32-STATIC-NEXT: calll x
+; LINUX-32-STATIC-NEXT: calll x
+; LINUX-32-STATIC-NEXT: calll x
+; LINUX-32-STATIC-NEXT: calll x
+; LINUX-32-STATIC-NEXT: calll x
+; LINUX-32-STATIC-NEXT: calll x
; LINUX-32-STATIC-NEXT: addl $4, %esp
; LINUX-32-STATIC-NEXT: ret
; LINUX-32-PIC: lcallee:
; LINUX-32-PIC: subl $4, %esp
-; LINUX-32-PIC-NEXT: call x
-; LINUX-32-PIC-NEXT: call x
-; LINUX-32-PIC-NEXT: call x
-; LINUX-32-PIC-NEXT: call x
-; LINUX-32-PIC-NEXT: call x
-; LINUX-32-PIC-NEXT: call x
-; LINUX-32-PIC-NEXT: call x
+; LINUX-32-PIC-NEXT: calll x
+; LINUX-32-PIC-NEXT: calll x
+; LINUX-32-PIC-NEXT: calll x
+; LINUX-32-PIC-NEXT: calll x
+; LINUX-32-PIC-NEXT: calll x
+; LINUX-32-PIC-NEXT: calll x
+; LINUX-32-PIC-NEXT: calll x
; LINUX-32-PIC-NEXT: addl $4, %esp
; LINUX-32-PIC-NEXT: ret
@@ -8412,37 +8412,37 @@ entry:
; DARWIN-32-STATIC: _lcallee:
; DARWIN-32-STATIC: subl $12, %esp
-; DARWIN-32-STATIC-NEXT: call _x
-; DARWIN-32-STATIC-NEXT: call _x
-; DARWIN-32-STATIC-NEXT: call _x
-; DARWIN-32-STATIC-NEXT: call _x
-; DARWIN-32-STATIC-NEXT: call _x
-; DARWIN-32-STATIC-NEXT: call _x
-; DARWIN-32-STATIC-NEXT: call _x
+; DARWIN-32-STATIC-NEXT: calll _x
+; DARWIN-32-STATIC-NEXT: calll _x
+; DARWIN-32-STATIC-NEXT: calll _x
+; DARWIN-32-STATIC-NEXT: calll _x
+; DARWIN-32-STATIC-NEXT: calll _x
+; DARWIN-32-STATIC-NEXT: calll _x
+; DARWIN-32-STATIC-NEXT: calll _x
; DARWIN-32-STATIC-NEXT: addl $12, %esp
; DARWIN-32-STATIC-NEXT: ret
; DARWIN-32-DYNAMIC: _lcallee:
; DARWIN-32-DYNAMIC: subl $12, %esp
-; DARWIN-32-DYNAMIC-NEXT: call L_x$stub
-; DARWIN-32-DYNAMIC-NEXT: call L_x$stub
-; DARWIN-32-DYNAMIC-NEXT: call L_x$stub
-; DARWIN-32-DYNAMIC-NEXT: call L_x$stub
-; DARWIN-32-DYNAMIC-NEXT: call L_x$stub
-; DARWIN-32-DYNAMIC-NEXT: call L_x$stub
-; DARWIN-32-DYNAMIC-NEXT: call L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: calll L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: calll L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: calll L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: calll L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: calll L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: calll L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: calll L_x$stub
; DARWIN-32-DYNAMIC-NEXT: addl $12, %esp
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _lcallee:
; DARWIN-32-PIC: subl $12, %esp
-; DARWIN-32-PIC-NEXT: call L_x$stub
-; DARWIN-32-PIC-NEXT: call L_x$stub
-; DARWIN-32-PIC-NEXT: call L_x$stub
-; DARWIN-32-PIC-NEXT: call L_x$stub
-; DARWIN-32-PIC-NEXT: call L_x$stub
-; DARWIN-32-PIC-NEXT: call L_x$stub
-; DARWIN-32-PIC-NEXT: call L_x$stub
+; DARWIN-32-PIC-NEXT: calll L_x$stub
+; DARWIN-32-PIC-NEXT: calll L_x$stub
+; DARWIN-32-PIC-NEXT: calll L_x$stub
+; DARWIN-32-PIC-NEXT: calll L_x$stub
+; DARWIN-32-PIC-NEXT: calll L_x$stub
+; DARWIN-32-PIC-NEXT: calll L_x$stub
+; DARWIN-32-PIC-NEXT: calll L_x$stub
; DARWIN-32-PIC-NEXT: addl $12, %esp
; DARWIN-32-PIC-NEXT: ret
@@ -8507,25 +8507,25 @@ entry:
; LINUX-32-STATIC: dcallee:
; LINUX-32-STATIC: subl $4, %esp
-; LINUX-32-STATIC-NEXT: call y
-; LINUX-32-STATIC-NEXT: call y
-; LINUX-32-STATIC-NEXT: call y
-; LINUX-32-STATIC-NEXT: call y
-; LINUX-32-STATIC-NEXT: call y
-; LINUX-32-STATIC-NEXT: call y
-; LINUX-32-STATIC-NEXT: call y
+; LINUX-32-STATIC-NEXT: calll y
+; LINUX-32-STATIC-NEXT: calll y
+; LINUX-32-STATIC-NEXT: calll y
+; LINUX-32-STATIC-NEXT: calll y
+; LINUX-32-STATIC-NEXT: calll y
+; LINUX-32-STATIC-NEXT: calll y
+; LINUX-32-STATIC-NEXT: calll y
; LINUX-32-STATIC-NEXT: addl $4, %esp
; LINUX-32-STATIC-NEXT: ret
; LINUX-32-PIC: dcallee:
; LINUX-32-PIC: subl $4, %esp
-; LINUX-32-PIC-NEXT: call y
-; LINUX-32-PIC-NEXT: call y
-; LINUX-32-PIC-NEXT: call y
-; LINUX-32-PIC-NEXT: call y
-; LINUX-32-PIC-NEXT: call y
-; LINUX-32-PIC-NEXT: call y
-; LINUX-32-PIC-NEXT: call y
+; LINUX-32-PIC-NEXT: calll y
+; LINUX-32-PIC-NEXT: calll y
+; LINUX-32-PIC-NEXT: calll y
+; LINUX-32-PIC-NEXT: calll y
+; LINUX-32-PIC-NEXT: calll y
+; LINUX-32-PIC-NEXT: calll y
+; LINUX-32-PIC-NEXT: calll y
; LINUX-32-PIC-NEXT: addl $4, %esp
; LINUX-32-PIC-NEXT: ret
@@ -8543,37 +8543,37 @@ entry:
; DARWIN-32-STATIC: _dcallee:
; DARWIN-32-STATIC: subl $12, %esp
-; DARWIN-32-STATIC-NEXT: call _y
-; DARWIN-32-STATIC-NEXT: call _y
-; DARWIN-32-STATIC-NEXT: call _y
-; DARWIN-32-STATIC-NEXT: call _y
-; DARWIN-32-STATIC-NEXT: call _y
-; DARWIN-32-STATIC-NEXT: call _y
-; DARWIN-32-STATIC-NEXT: call _y
+; DARWIN-32-STATIC-NEXT: calll _y
+; DARWIN-32-STATIC-NEXT: calll _y
+; DARWIN-32-STATIC-NEXT: calll _y
+; DARWIN-32-STATIC-NEXT: calll _y
+; DARWIN-32-STATIC-NEXT: calll _y
+; DARWIN-32-STATIC-NEXT: calll _y
+; DARWIN-32-STATIC-NEXT: calll _y
; DARWIN-32-STATIC-NEXT: addl $12, %esp
; DARWIN-32-STATIC-NEXT: ret
; DARWIN-32-DYNAMIC: _dcallee:
; DARWIN-32-DYNAMIC: subl $12, %esp
-; DARWIN-32-DYNAMIC-NEXT: call L_y$stub
-; DARWIN-32-DYNAMIC-NEXT: call L_y$stub
-; DARWIN-32-DYNAMIC-NEXT: call L_y$stub
-; DARWIN-32-DYNAMIC-NEXT: call L_y$stub
-; DARWIN-32-DYNAMIC-NEXT: call L_y$stub
-; DARWIN-32-DYNAMIC-NEXT: call L_y$stub
-; DARWIN-32-DYNAMIC-NEXT: call L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: calll L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: calll L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: calll L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: calll L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: calll L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: calll L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: calll L_y$stub
; DARWIN-32-DYNAMIC-NEXT: addl $12, %esp
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _dcallee:
; DARWIN-32-PIC: subl $12, %esp
-; DARWIN-32-PIC-NEXT: call L_y$stub
-; DARWIN-32-PIC-NEXT: call L_y$stub
-; DARWIN-32-PIC-NEXT: call L_y$stub
-; DARWIN-32-PIC-NEXT: call L_y$stub
-; DARWIN-32-PIC-NEXT: call L_y$stub
-; DARWIN-32-PIC-NEXT: call L_y$stub
-; DARWIN-32-PIC-NEXT: call L_y$stub
+; DARWIN-32-PIC-NEXT: calll L_y$stub
+; DARWIN-32-PIC-NEXT: calll L_y$stub
+; DARWIN-32-PIC-NEXT: calll L_y$stub
+; DARWIN-32-PIC-NEXT: calll L_y$stub
+; DARWIN-32-PIC-NEXT: calll L_y$stub
+; DARWIN-32-PIC-NEXT: calll L_y$stub
+; DARWIN-32-PIC-NEXT: calll L_y$stub
; DARWIN-32-PIC-NEXT: addl $12, %esp
; DARWIN-32-PIC-NEXT: ret
@@ -8644,7 +8644,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _address:
-; DARWIN-32-PIC: call L133$pb
+; DARWIN-32-PIC: calll L133$pb
; DARWIN-32-PIC-NEXT: L133$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_callee$non_lazy_ptr-L133$pb(%eax), %eax
@@ -8693,7 +8693,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _laddress:
-; DARWIN-32-PIC: call L134$pb
+; DARWIN-32-PIC: calll L134$pb
; DARWIN-32-PIC-NEXT: L134$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal _lcallee-L134$pb(%eax), %eax
@@ -8740,7 +8740,7 @@ entry:
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _daddress:
-; DARWIN-32-PIC: call L135$pb
+; DARWIN-32-PIC: calll L135$pb
; DARWIN-32-PIC-NEXT: L135$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: leal _dcallee-L135$pb(%eax), %eax
@@ -8771,15 +8771,15 @@ entry:
; LINUX-32-STATIC: caller:
; LINUX-32-STATIC: subl $4, %esp
-; LINUX-32-STATIC-NEXT: call callee
-; LINUX-32-STATIC-NEXT: call callee
+; LINUX-32-STATIC-NEXT: calll callee
+; LINUX-32-STATIC-NEXT: calll callee
; LINUX-32-STATIC-NEXT: addl $4, %esp
; LINUX-32-STATIC-NEXT: ret
; LINUX-32-PIC: caller:
; LINUX-32-PIC: subl $4, %esp
-; LINUX-32-PIC-NEXT: call callee
-; LINUX-32-PIC-NEXT: call callee
+; LINUX-32-PIC-NEXT: calll callee
+; LINUX-32-PIC-NEXT: calll callee
; LINUX-32-PIC-NEXT: addl $4, %esp
; LINUX-32-PIC-NEXT: ret
@@ -8792,22 +8792,22 @@ entry:
; DARWIN-32-STATIC: _caller:
; DARWIN-32-STATIC: subl $12, %esp
-; DARWIN-32-STATIC-NEXT: call _callee
-; DARWIN-32-STATIC-NEXT: call _callee
+; DARWIN-32-STATIC-NEXT: calll _callee
+; DARWIN-32-STATIC-NEXT: calll _callee
; DARWIN-32-STATIC-NEXT: addl $12, %esp
; DARWIN-32-STATIC-NEXT: ret
; DARWIN-32-DYNAMIC: _caller:
; DARWIN-32-DYNAMIC: subl $12, %esp
-; DARWIN-32-DYNAMIC-NEXT: call L_callee$stub
-; DARWIN-32-DYNAMIC-NEXT: call L_callee$stub
+; DARWIN-32-DYNAMIC-NEXT: calll L_callee$stub
+; DARWIN-32-DYNAMIC-NEXT: calll L_callee$stub
; DARWIN-32-DYNAMIC-NEXT: addl $12, %esp
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _caller:
; DARWIN-32-PIC: subl $12, %esp
-; DARWIN-32-PIC-NEXT: call L_callee$stub
-; DARWIN-32-PIC-NEXT: call L_callee$stub
+; DARWIN-32-PIC-NEXT: calll L_callee$stub
+; DARWIN-32-PIC-NEXT: calll L_callee$stub
; DARWIN-32-PIC-NEXT: addl $12, %esp
; DARWIN-32-PIC-NEXT: ret
@@ -8845,15 +8845,15 @@ entry:
; LINUX-32-STATIC: dcaller:
; LINUX-32-STATIC: subl $4, %esp
-; LINUX-32-STATIC-NEXT: call dcallee
-; LINUX-32-STATIC-NEXT: call dcallee
+; LINUX-32-STATIC-NEXT: calll dcallee
+; LINUX-32-STATIC-NEXT: calll dcallee
; LINUX-32-STATIC-NEXT: addl $4, %esp
; LINUX-32-STATIC-NEXT: ret
; LINUX-32-PIC: dcaller:
; LINUX-32-PIC: subl $4, %esp
-; LINUX-32-PIC-NEXT: call dcallee
-; LINUX-32-PIC-NEXT: call dcallee
+; LINUX-32-PIC-NEXT: calll dcallee
+; LINUX-32-PIC-NEXT: calll dcallee
; LINUX-32-PIC-NEXT: addl $4, %esp
; LINUX-32-PIC-NEXT: ret
@@ -8866,22 +8866,22 @@ entry:
; DARWIN-32-STATIC: _dcaller:
; DARWIN-32-STATIC: subl $12, %esp
-; DARWIN-32-STATIC-NEXT: call _dcallee
-; DARWIN-32-STATIC-NEXT: call _dcallee
+; DARWIN-32-STATIC-NEXT: calll _dcallee
+; DARWIN-32-STATIC-NEXT: calll _dcallee
; DARWIN-32-STATIC-NEXT: addl $12, %esp
; DARWIN-32-STATIC-NEXT: ret
; DARWIN-32-DYNAMIC: _dcaller:
; DARWIN-32-DYNAMIC: subl $12, %esp
-; DARWIN-32-DYNAMIC-NEXT: call _dcallee
-; DARWIN-32-DYNAMIC-NEXT: call _dcallee
+; DARWIN-32-DYNAMIC-NEXT: calll _dcallee
+; DARWIN-32-DYNAMIC-NEXT: calll _dcallee
; DARWIN-32-DYNAMIC-NEXT: addl $12, %esp
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _dcaller:
; DARWIN-32-PIC: subl $12, %esp
-; DARWIN-32-PIC-NEXT: call _dcallee
-; DARWIN-32-PIC-NEXT: call _dcallee
+; DARWIN-32-PIC-NEXT: calll _dcallee
+; DARWIN-32-PIC-NEXT: calll _dcallee
; DARWIN-32-PIC-NEXT: addl $12, %esp
; DARWIN-32-PIC-NEXT: ret
@@ -8919,15 +8919,15 @@ entry:
; LINUX-32-STATIC: lcaller:
; LINUX-32-STATIC: subl $4, %esp
-; LINUX-32-STATIC-NEXT: call lcallee
-; LINUX-32-STATIC-NEXT: call lcallee
+; LINUX-32-STATIC-NEXT: calll lcallee
+; LINUX-32-STATIC-NEXT: calll lcallee
; LINUX-32-STATIC-NEXT: addl $4, %esp
; LINUX-32-STATIC-NEXT: ret
; LINUX-32-PIC: lcaller:
; LINUX-32-PIC: subl $4, %esp
-; LINUX-32-PIC-NEXT: call lcallee
-; LINUX-32-PIC-NEXT: call lcallee
+; LINUX-32-PIC-NEXT: calll lcallee
+; LINUX-32-PIC-NEXT: calll lcallee
; LINUX-32-PIC-NEXT: addl $4, %esp
; LINUX-32-PIC-NEXT: ret
@@ -8940,22 +8940,22 @@ entry:
; DARWIN-32-STATIC: _lcaller:
; DARWIN-32-STATIC: subl $12, %esp
-; DARWIN-32-STATIC-NEXT: call _lcallee
-; DARWIN-32-STATIC-NEXT: call _lcallee
+; DARWIN-32-STATIC-NEXT: calll _lcallee
+; DARWIN-32-STATIC-NEXT: calll _lcallee
; DARWIN-32-STATIC-NEXT: addl $12, %esp
; DARWIN-32-STATIC-NEXT: ret
; DARWIN-32-DYNAMIC: _lcaller:
; DARWIN-32-DYNAMIC: subl $12, %esp
-; DARWIN-32-DYNAMIC-NEXT: call _lcallee
-; DARWIN-32-DYNAMIC-NEXT: call _lcallee
+; DARWIN-32-DYNAMIC-NEXT: calll _lcallee
+; DARWIN-32-DYNAMIC-NEXT: calll _lcallee
; DARWIN-32-DYNAMIC-NEXT: addl $12, %esp
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _lcaller:
; DARWIN-32-PIC: subl $12, %esp
-; DARWIN-32-PIC-NEXT: call _lcallee
-; DARWIN-32-PIC-NEXT: call _lcallee
+; DARWIN-32-PIC-NEXT: calll _lcallee
+; DARWIN-32-PIC-NEXT: calll _lcallee
; DARWIN-32-PIC-NEXT: addl $12, %esp
; DARWIN-32-PIC-NEXT: ret
@@ -8991,13 +8991,13 @@ entry:
; LINUX-32-STATIC: tailcaller:
; LINUX-32-STATIC: subl $4, %esp
-; LINUX-32-STATIC-NEXT: call callee
+; LINUX-32-STATIC-NEXT: calll callee
; LINUX-32-STATIC-NEXT: addl $4, %esp
; LINUX-32-STATIC-NEXT: ret
; LINUX-32-PIC: tailcaller:
; LINUX-32-PIC: subl $4, %esp
-; LINUX-32-PIC-NEXT: call callee
+; LINUX-32-PIC-NEXT: calll callee
; LINUX-32-PIC-NEXT: addl $4, %esp
; LINUX-32-PIC-NEXT: ret
@@ -9009,19 +9009,19 @@ entry:
; DARWIN-32-STATIC: _tailcaller:
; DARWIN-32-STATIC: subl $12, %esp
-; DARWIN-32-STATIC-NEXT: call _callee
+; DARWIN-32-STATIC-NEXT: calll _callee
; DARWIN-32-STATIC-NEXT: addl $12, %esp
; DARWIN-32-STATIC-NEXT: ret
; DARWIN-32-DYNAMIC: _tailcaller:
; DARWIN-32-DYNAMIC: subl $12, %esp
-; DARWIN-32-DYNAMIC-NEXT: call L_callee$stub
+; DARWIN-32-DYNAMIC-NEXT: calll L_callee$stub
; DARWIN-32-DYNAMIC-NEXT: addl $12, %esp
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _tailcaller:
; DARWIN-32-PIC: subl $12, %esp
-; DARWIN-32-PIC-NEXT: call L_callee$stub
+; DARWIN-32-PIC-NEXT: calll L_callee$stub
; DARWIN-32-PIC-NEXT: addl $12, %esp
; DARWIN-32-PIC-NEXT: ret
@@ -9054,13 +9054,13 @@ entry:
; LINUX-32-STATIC: dtailcaller:
; LINUX-32-STATIC: subl $4, %esp
-; LINUX-32-STATIC-NEXT: call dcallee
+; LINUX-32-STATIC-NEXT: calll dcallee
; LINUX-32-STATIC-NEXT: addl $4, %esp
; LINUX-32-STATIC-NEXT: ret
; LINUX-32-PIC: dtailcaller:
; LINUX-32-PIC: subl $4, %esp
-; LINUX-32-PIC-NEXT: call dcallee
+; LINUX-32-PIC-NEXT: calll dcallee
; LINUX-32-PIC-NEXT: addl $4, %esp
; LINUX-32-PIC-NEXT: ret
@@ -9072,19 +9072,19 @@ entry:
; DARWIN-32-STATIC: _dtailcaller:
; DARWIN-32-STATIC: subl $12, %esp
-; DARWIN-32-STATIC-NEXT: call _dcallee
+; DARWIN-32-STATIC-NEXT: calll _dcallee
; DARWIN-32-STATIC-NEXT: addl $12, %esp
; DARWIN-32-STATIC-NEXT: ret
; DARWIN-32-DYNAMIC: _dtailcaller:
; DARWIN-32-DYNAMIC: subl $12, %esp
-; DARWIN-32-DYNAMIC-NEXT: call _dcallee
+; DARWIN-32-DYNAMIC-NEXT: calll _dcallee
; DARWIN-32-DYNAMIC-NEXT: addl $12, %esp
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _dtailcaller:
; DARWIN-32-PIC: subl $12, %esp
-; DARWIN-32-PIC-NEXT: call _dcallee
+; DARWIN-32-PIC-NEXT: calll _dcallee
; DARWIN-32-PIC-NEXT: addl $12, %esp
; DARWIN-32-PIC-NEXT: ret
@@ -9117,13 +9117,13 @@ entry:
; LINUX-32-STATIC: ltailcaller:
; LINUX-32-STATIC: subl $4, %esp
-; LINUX-32-STATIC-NEXT: call lcallee
+; LINUX-32-STATIC-NEXT: calll lcallee
; LINUX-32-STATIC-NEXT: addl $4, %esp
; LINUX-32-STATIC-NEXT: ret
; LINUX-32-PIC: ltailcaller:
; LINUX-32-PIC: subl $4, %esp
-; LINUX-32-PIC-NEXT: call lcallee
+; LINUX-32-PIC-NEXT: calll lcallee
; LINUX-32-PIC-NEXT: addl $4, %esp
; LINUX-32-PIC-NEXT: ret
@@ -9135,19 +9135,19 @@ entry:
; DARWIN-32-STATIC: _ltailcaller:
; DARWIN-32-STATIC: subl $12, %esp
-; DARWIN-32-STATIC-NEXT: call _lcallee
+; DARWIN-32-STATIC-NEXT: calll _lcallee
; DARWIN-32-STATIC-NEXT: addl $12, %esp
; DARWIN-32-STATIC-NEXT: ret
; DARWIN-32-DYNAMIC: _ltailcaller:
; DARWIN-32-DYNAMIC: subl $12, %esp
-; DARWIN-32-DYNAMIC-NEXT: call _lcallee
+; DARWIN-32-DYNAMIC-NEXT: calll _lcallee
; DARWIN-32-DYNAMIC-NEXT: addl $12, %esp
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _ltailcaller:
; DARWIN-32-PIC: subl $12, %esp
-; DARWIN-32-PIC-NEXT: call _lcallee
+; DARWIN-32-PIC-NEXT: calll _lcallee
; DARWIN-32-PIC-NEXT: addl $12, %esp
; DARWIN-32-PIC-NEXT: ret
@@ -9184,15 +9184,15 @@ entry:
; LINUX-32-STATIC: icaller:
; LINUX-32-STATIC: subl $4, %esp
-; LINUX-32-STATIC-NEXT: call *ifunc
-; LINUX-32-STATIC-NEXT: call *ifunc
+; LINUX-32-STATIC-NEXT: calll *ifunc
+; LINUX-32-STATIC-NEXT: calll *ifunc
; LINUX-32-STATIC-NEXT: addl $4, %esp
; LINUX-32-STATIC-NEXT: ret
; LINUX-32-PIC: icaller:
; LINUX-32-PIC: subl $4, %esp
-; LINUX-32-PIC-NEXT: call *ifunc
-; LINUX-32-PIC-NEXT: call *ifunc
+; LINUX-32-PIC-NEXT: calll *ifunc
+; LINUX-32-PIC-NEXT: calll *ifunc
; LINUX-32-PIC-NEXT: addl $4, %esp
; LINUX-32-PIC-NEXT: ret
@@ -9206,8 +9206,8 @@ entry:
; DARWIN-32-STATIC: _icaller:
; DARWIN-32-STATIC: subl $12, %esp
-; DARWIN-32-STATIC-NEXT: call *_ifunc
-; DARWIN-32-STATIC-NEXT: call *_ifunc
+; DARWIN-32-STATIC-NEXT: calll *_ifunc
+; DARWIN-32-STATIC-NEXT: calll *_ifunc
; DARWIN-32-STATIC-NEXT: addl $12, %esp
; DARWIN-32-STATIC-NEXT: ret
@@ -9215,8 +9215,8 @@ entry:
; DARWIN-32-DYNAMIC: pushl %esi
; DARWIN-32-DYNAMIC-NEXT: subl $8, %esp
; DARWIN-32-DYNAMIC-NEXT: movl L_ifunc$non_lazy_ptr, %esi
-; DARWIN-32-DYNAMIC-NEXT: call *(%esi)
-; DARWIN-32-DYNAMIC-NEXT: call *(%esi)
+; DARWIN-32-DYNAMIC-NEXT: calll *(%esi)
+; DARWIN-32-DYNAMIC-NEXT: calll *(%esi)
; DARWIN-32-DYNAMIC-NEXT: addl $8, %esp
; DARWIN-32-DYNAMIC-NEXT: popl %esi
; DARWIN-32-DYNAMIC-NEXT: ret
@@ -9224,12 +9224,12 @@ entry:
; DARWIN-32-PIC: _icaller:
; DARWIN-32-PIC: pushl %esi
; DARWIN-32-PIC-NEXT: subl $8, %esp
-; DARWIN-32-PIC-NEXT: call L142$pb
+; DARWIN-32-PIC-NEXT: calll L142$pb
; DARWIN-32-PIC-NEXT: L142$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_ifunc$non_lazy_ptr-L142$pb(%eax), %esi
-; DARWIN-32-PIC-NEXT: call *(%esi)
-; DARWIN-32-PIC-NEXT: call *(%esi)
+; DARWIN-32-PIC-NEXT: calll *(%esi)
+; DARWIN-32-PIC-NEXT: calll *(%esi)
; DARWIN-32-PIC-NEXT: addl $8, %esp
; DARWIN-32-PIC-NEXT: popl %esi
; DARWIN-32-PIC-NEXT: ret
@@ -9273,15 +9273,15 @@ entry:
; LINUX-32-STATIC: dicaller:
; LINUX-32-STATIC: subl $4, %esp
-; LINUX-32-STATIC-NEXT: call *difunc
-; LINUX-32-STATIC-NEXT: call *difunc
+; LINUX-32-STATIC-NEXT: calll *difunc
+; LINUX-32-STATIC-NEXT: calll *difunc
; LINUX-32-STATIC-NEXT: addl $4, %esp
; LINUX-32-STATIC-NEXT: ret
; LINUX-32-PIC: dicaller:
; LINUX-32-PIC: subl $4, %esp
-; LINUX-32-PIC-NEXT: call *difunc
-; LINUX-32-PIC-NEXT: call *difunc
+; LINUX-32-PIC-NEXT: calll *difunc
+; LINUX-32-PIC-NEXT: calll *difunc
; LINUX-32-PIC-NEXT: addl $4, %esp
; LINUX-32-PIC-NEXT: ret
@@ -9295,26 +9295,26 @@ entry:
; DARWIN-32-STATIC: _dicaller:
; DARWIN-32-STATIC: subl $12, %esp
-; DARWIN-32-STATIC-NEXT: call *_difunc
-; DARWIN-32-STATIC-NEXT: call *_difunc
+; DARWIN-32-STATIC-NEXT: calll *_difunc
+; DARWIN-32-STATIC-NEXT: calll *_difunc
; DARWIN-32-STATIC-NEXT: addl $12, %esp
; DARWIN-32-STATIC-NEXT: ret
; DARWIN-32-DYNAMIC: _dicaller:
; DARWIN-32-DYNAMIC: subl $12, %esp
-; DARWIN-32-DYNAMIC-NEXT: call *_difunc
-; DARWIN-32-DYNAMIC-NEXT: call *_difunc
+; DARWIN-32-DYNAMIC-NEXT: calll *_difunc
+; DARWIN-32-DYNAMIC-NEXT: calll *_difunc
; DARWIN-32-DYNAMIC-NEXT: addl $12, %esp
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _dicaller:
; DARWIN-32-PIC: pushl %esi
; DARWIN-32-PIC-NEXT: subl $8, %esp
-; DARWIN-32-PIC-NEXT: call L143$pb
+; DARWIN-32-PIC-NEXT: calll L143$pb
; DARWIN-32-PIC-NEXT: L143$pb:
; DARWIN-32-PIC-NEXT: popl %esi
-; DARWIN-32-PIC-NEXT: call *_difunc-L143$pb(%esi)
-; DARWIN-32-PIC-NEXT: call *_difunc-L143$pb(%esi)
+; DARWIN-32-PIC-NEXT: calll *_difunc-L143$pb(%esi)
+; DARWIN-32-PIC-NEXT: calll *_difunc-L143$pb(%esi)
; DARWIN-32-PIC-NEXT: addl $8, %esp
; DARWIN-32-PIC-NEXT: popl %esi
; DARWIN-32-PIC-NEXT: ret
@@ -9355,15 +9355,15 @@ entry:
; LINUX-32-STATIC: licaller:
; LINUX-32-STATIC: subl $4, %esp
-; LINUX-32-STATIC-NEXT: call *lifunc
-; LINUX-32-STATIC-NEXT: call *lifunc
+; LINUX-32-STATIC-NEXT: calll *lifunc
+; LINUX-32-STATIC-NEXT: calll *lifunc
; LINUX-32-STATIC-NEXT: addl $4, %esp
; LINUX-32-STATIC-NEXT: ret
; LINUX-32-PIC: licaller:
; LINUX-32-PIC: subl $4, %esp
-; LINUX-32-PIC-NEXT: call *lifunc
-; LINUX-32-PIC-NEXT: call *lifunc
+; LINUX-32-PIC-NEXT: calll *lifunc
+; LINUX-32-PIC-NEXT: calll *lifunc
; LINUX-32-PIC-NEXT: addl $4, %esp
; LINUX-32-PIC-NEXT: ret
@@ -9376,26 +9376,26 @@ entry:
; DARWIN-32-STATIC: _licaller:
; DARWIN-32-STATIC: subl $12, %esp
-; DARWIN-32-STATIC-NEXT: call *_lifunc
-; DARWIN-32-STATIC-NEXT: call *_lifunc
+; DARWIN-32-STATIC-NEXT: calll *_lifunc
+; DARWIN-32-STATIC-NEXT: calll *_lifunc
; DARWIN-32-STATIC-NEXT: addl $12, %esp
; DARWIN-32-STATIC-NEXT: ret
; DARWIN-32-DYNAMIC: _licaller:
; DARWIN-32-DYNAMIC: subl $12, %esp
-; DARWIN-32-DYNAMIC-NEXT: call *_lifunc
-; DARWIN-32-DYNAMIC-NEXT: call *_lifunc
+; DARWIN-32-DYNAMIC-NEXT: calll *_lifunc
+; DARWIN-32-DYNAMIC-NEXT: calll *_lifunc
; DARWIN-32-DYNAMIC-NEXT: addl $12, %esp
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _licaller:
; DARWIN-32-PIC: pushl %esi
; DARWIN-32-PIC-NEXT: subl $8, %esp
-; DARWIN-32-PIC-NEXT: call L144$pb
+; DARWIN-32-PIC-NEXT: calll L144$pb
; DARWIN-32-PIC-NEXT: L144$pb:
; DARWIN-32-PIC-NEXT: popl %esi
-; DARWIN-32-PIC-NEXT: call *_lifunc-L144$pb(%esi)
-; DARWIN-32-PIC-NEXT: call *_lifunc-L144$pb(%esi)
+; DARWIN-32-PIC-NEXT: calll *_lifunc-L144$pb(%esi)
+; DARWIN-32-PIC-NEXT: calll *_lifunc-L144$pb(%esi)
; DARWIN-32-PIC-NEXT: addl $8, %esp
; DARWIN-32-PIC-NEXT: popl %esi
; DARWIN-32-PIC-NEXT: ret
@@ -9436,15 +9436,15 @@ entry:
; LINUX-32-STATIC: itailcaller:
; LINUX-32-STATIC: subl $4, %esp
-; LINUX-32-STATIC-NEXT: call *ifunc
-; LINUX-32-STATIC-NEXT: call *ifunc
+; LINUX-32-STATIC-NEXT: calll *ifunc
+; LINUX-32-STATIC-NEXT: calll *ifunc
; LINUX-32-STATIC-NEXT: addl $4, %esp
; LINUX-32-STATIC-NEXT: ret
; LINUX-32-PIC: itailcaller:
; LINUX-32-PIC: subl $4, %esp
-; LINUX-32-PIC-NEXT: call *ifunc
-; LINUX-32-PIC-NEXT: call *ifunc
+; LINUX-32-PIC-NEXT: calll *ifunc
+; LINUX-32-PIC-NEXT: calll *ifunc
; LINUX-32-PIC-NEXT: addl $4, %esp
; LINUX-32-PIC-NEXT: ret
@@ -9458,8 +9458,8 @@ entry:
; DARWIN-32-STATIC: _itailcaller:
; DARWIN-32-STATIC: subl $12, %esp
-; DARWIN-32-STATIC-NEXT: call *_ifunc
-; DARWIN-32-STATIC-NEXT: call *_ifunc
+; DARWIN-32-STATIC-NEXT: calll *_ifunc
+; DARWIN-32-STATIC-NEXT: calll *_ifunc
; DARWIN-32-STATIC-NEXT: addl $12, %esp
; DARWIN-32-STATIC-NEXT: ret
@@ -9467,8 +9467,8 @@ entry:
; DARWIN-32-DYNAMIC: pushl %esi
; DARWIN-32-DYNAMIC-NEXT: subl $8, %esp
; DARWIN-32-DYNAMIC-NEXT: movl L_ifunc$non_lazy_ptr, %esi
-; DARWIN-32-DYNAMIC-NEXT: call *(%esi)
-; DARWIN-32-DYNAMIC-NEXT: call *(%esi)
+; DARWIN-32-DYNAMIC-NEXT: calll *(%esi)
+; DARWIN-32-DYNAMIC-NEXT: calll *(%esi)
; DARWIN-32-DYNAMIC-NEXT: addl $8, %esp
; DARWIN-32-DYNAMIC-NEXT: popl %esi
; DARWIN-32-DYNAMIC-NEXT: ret
@@ -9476,12 +9476,12 @@ entry:
; DARWIN-32-PIC: _itailcaller:
; DARWIN-32-PIC: pushl %esi
; DARWIN-32-PIC-NEXT: subl $8, %esp
-; DARWIN-32-PIC-NEXT: call L145$pb
+; DARWIN-32-PIC-NEXT: calll L145$pb
; DARWIN-32-PIC-NEXT: L145$pb:
; DARWIN-32-PIC-NEXT: popl %eax
; DARWIN-32-PIC-NEXT: movl L_ifunc$non_lazy_ptr-L145$pb(%eax), %esi
-; DARWIN-32-PIC-NEXT: call *(%esi)
-; DARWIN-32-PIC-NEXT: call *(%esi)
+; DARWIN-32-PIC-NEXT: calll *(%esi)
+; DARWIN-32-PIC-NEXT: calll *(%esi)
; DARWIN-32-PIC-NEXT: addl $8, %esp
; DARWIN-32-PIC-NEXT: popl %esi
; DARWIN-32-PIC-NEXT: ret
@@ -9522,13 +9522,13 @@ entry:
; LINUX-32-STATIC: ditailcaller:
; LINUX-32-STATIC: subl $4, %esp
-; LINUX-32-STATIC-NEXT: call *difunc
+; LINUX-32-STATIC-NEXT: calll *difunc
; LINUX-32-STATIC-NEXT: addl $4, %esp
; LINUX-32-STATIC-NEXT: ret
; LINUX-32-PIC: ditailcaller:
; LINUX-32-PIC: subl $4, %esp
-; LINUX-32-PIC-NEXT: call *difunc
+; LINUX-32-PIC-NEXT: calll *difunc
; LINUX-32-PIC-NEXT: addl $4, %esp
; LINUX-32-PIC-NEXT: ret
@@ -9541,22 +9541,22 @@ entry:
; DARWIN-32-STATIC: _ditailcaller:
; DARWIN-32-STATIC: subl $12, %esp
-; DARWIN-32-STATIC-NEXT: call *_difunc
+; DARWIN-32-STATIC-NEXT: calll *_difunc
; DARWIN-32-STATIC-NEXT: addl $12, %esp
; DARWIN-32-STATIC-NEXT: ret
; DARWIN-32-DYNAMIC: _ditailcaller:
; DARWIN-32-DYNAMIC: subl $12, %esp
-; DARWIN-32-DYNAMIC-NEXT: call *_difunc
+; DARWIN-32-DYNAMIC-NEXT: calll *_difunc
; DARWIN-32-DYNAMIC-NEXT: addl $12, %esp
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _ditailcaller:
; DARWIN-32-PIC: subl $12, %esp
-; DARWIN-32-PIC-NEXT: call L146$pb
+; DARWIN-32-PIC-NEXT: calll L146$pb
; DARWIN-32-PIC-NEXT: L146$pb:
; DARWIN-32-PIC-NEXT: popl %eax
-; DARWIN-32-PIC-NEXT: call *_difunc-L146$pb(%eax)
+; DARWIN-32-PIC-NEXT: calll *_difunc-L146$pb(%eax)
; DARWIN-32-PIC-NEXT: addl $12, %esp
; DARWIN-32-PIC-NEXT: ret
@@ -9589,13 +9589,13 @@ entry:
; LINUX-32-STATIC: litailcaller:
; LINUX-32-STATIC: subl $4, %esp
-; LINUX-32-STATIC-NEXT: call *lifunc
+; LINUX-32-STATIC-NEXT: calll *lifunc
; LINUX-32-STATIC-NEXT: addl $4, %esp
; LINUX-32-STATIC-NEXT: ret
; LINUX-32-PIC: litailcaller:
; LINUX-32-PIC: subl $4, %esp
-; LINUX-32-PIC-NEXT: call *lifunc
+; LINUX-32-PIC-NEXT: calll *lifunc
; LINUX-32-PIC-NEXT: addl $4, %esp
; LINUX-32-PIC-NEXT: ret
@@ -9607,22 +9607,22 @@ entry:
; DARWIN-32-STATIC: _litailcaller:
; DARWIN-32-STATIC: subl $12, %esp
-; DARWIN-32-STATIC-NEXT: call *_lifunc
+; DARWIN-32-STATIC-NEXT: calll *_lifunc
; DARWIN-32-STATIC-NEXT: addl $12, %esp
; DARWIN-32-STATIC-NEXT: ret
; DARWIN-32-DYNAMIC: _litailcaller:
; DARWIN-32-DYNAMIC: subl $12, %esp
-; DARWIN-32-DYNAMIC-NEXT: call *_lifunc
+; DARWIN-32-DYNAMIC-NEXT: calll *_lifunc
; DARWIN-32-DYNAMIC-NEXT: addl $12, %esp
; DARWIN-32-DYNAMIC-NEXT: ret
; DARWIN-32-PIC: _litailcaller:
; DARWIN-32-PIC: subl $12, %esp
-; DARWIN-32-PIC-NEXT: call L147$pb
+; DARWIN-32-PIC-NEXT: calll L147$pb
; DARWIN-32-PIC-NEXT: L147$pb:
; DARWIN-32-PIC-NEXT: popl %eax
-; DARWIN-32-PIC-NEXT: call *_lifunc-L147$pb(%eax)
+; DARWIN-32-PIC-NEXT: calll *_lifunc-L147$pb(%eax)
; DARWIN-32-PIC-NEXT: addl $12, %esp
; DARWIN-32-PIC-NEXT: ret
diff --git a/test/CodeGen/X86/add-of-carry.ll b/test/CodeGen/X86/add-of-carry.ll
new file mode 100644
index 0000000..4c22574
--- /dev/null
+++ b/test/CodeGen/X86/add-of-carry.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; <rdar://problem/8449754>
+
+define i32 @add32carry(i32 %sum, i32 %x) nounwind readnone ssp {
+entry:
+; CHECK: sbbl %ecx, %ecx
+; CHECK-NOT: addl
+; CHECK: subl %ecx, %eax
+ %add4 = add i32 %x, %sum
+ %cmp = icmp ult i32 %add4, %x
+ %inc = zext i1 %cmp to i32
+ %z.0 = add i32 %add4, %inc
+ ret i32 %z.0
+}
diff --git a/test/CodeGen/X86/alldiv-divdi3.ll b/test/CodeGen/X86/alldiv-divdi3.ll
new file mode 100644
index 0000000..86aa1fd
--- /dev/null
+++ b/test/CodeGen/X86/alldiv-divdi3.ll
@@ -0,0 +1,17 @@
+; Test that, for a 64 bit signed div, a libcall to alldiv is made on Windows
+; unless we have libgcc.
+
+; RUN: llc < %s -mtriple i386-pc-win32 | FileCheck %s
+; RUN: llc < %s -mtriple i386-pc-cygwin | FileCheck %s -check-prefix USEDIVDI
+; RUN: llc < %s -mtriple i386-pc-mingw32 | FileCheck %s -check-prefix USEDIVDI
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind readonly {
+entry:
+ %conv4 = sext i32 %argc to i64
+ %div = sdiv i64 84, %conv4
+ %conv7 = trunc i64 %div to i32
+ ret i32 %conv7
+}
+
+; CHECK: alldiv
+; USEDIVDI: divdi3
diff --git a/test/CodeGen/X86/andimm8.ll b/test/CodeGen/X86/andimm8.ll
new file mode 100644
index 0000000..640237d
--- /dev/null
+++ b/test/CodeGen/X86/andimm8.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-linux-gnu -show-mc-encoding | FileCheck %s
+
+; PR8365
+; CHECK: andl $-64, %edi # encoding: [0x83,0xe7,0xc0]
+
+define i64 @bra(i32 %zed) nounwind {
+ %t1 = zext i32 %zed to i64
+ %t2 = and i64 %t1, 4294967232
+ ret i64 %t2
+}
+
+; CHECK: orq $2, %rdi # encoding: [0x48,0x83,0xcf,0x02]
+
+define void @foo(i64 %zed, i64* %x) nounwind {
+ %t1 = and i64 %zed, -4
+ %t2 = or i64 %t1, 2
+ store i64 %t2, i64* %x, align 8
+ ret void
+}
diff --git a/test/CodeGen/X86/atomic_op.ll b/test/CodeGen/X86/atomic_op.ll
index 3ef1887..84bae46 100644
--- a/test/CodeGen/X86/atomic_op.ll
+++ b/test/CodeGen/X86/atomic_op.ll
@@ -1,16 +1,8 @@
-; RUN: llc < %s -march=x86 -o %t1
-; RUN: grep "lock" %t1 | count 17
-; RUN: grep "xaddl" %t1 | count 4
-; RUN: grep "cmpxchgl" %t1 | count 13
-; RUN: grep "xchgl" %t1 | count 14
-; RUN: grep "cmova" %t1 | count 2
-; RUN: grep "cmovb" %t1 | count 2
-; RUN: grep "cmovg" %t1 | count 2
-; RUN: grep "cmovl" %t1 | count 2
+; RUN: llc < %s -march=x86 | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-define void @main(i32 %argc, i8** %argv) {
+define void @func(i32 %argc, i8** %argv) nounwind {
entry:
%argc.addr = alloca i32 ; <i32*> [#uses=1]
%argv.addr = alloca i8** ; <i8***> [#uses=1]
@@ -29,48 +21,105 @@ entry:
store i32 3855, i32* %ort
store i32 3855, i32* %xort
store i32 4, i32* %temp
- %tmp = load i32* %temp ; <i32> [#uses=1]
+ %tmp = load i32* %temp
+ ; CHECK: lock
+ ; CHECK: xaddl
call i32 @llvm.atomic.load.add.i32.p0i32( i32* %val1, i32 %tmp ) ; <i32>:0 [#uses=1]
store i32 %0, i32* %old
+ ; CHECK: lock
+ ; CHECK: xaddl
call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %val2, i32 30 ) ; <i32>:1 [#uses=1]
store i32 %1, i32* %old
+ ; CHECK: lock
+ ; CHECK: xaddl
call i32 @llvm.atomic.load.add.i32.p0i32( i32* %val2, i32 1 ) ; <i32>:2 [#uses=1]
store i32 %2, i32* %old
+ ; CHECK: lock
+ ; CHECK: xaddl
call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %val2, i32 1 ) ; <i32>:3 [#uses=1]
store i32 %3, i32* %old
+ ; CHECK: andl
+ ; CHECK: lock
+ ; CHECK: cmpxchgl
call i32 @llvm.atomic.load.and.i32.p0i32( i32* %andt, i32 4080 ) ; <i32>:4 [#uses=1]
store i32 %4, i32* %old
+ ; CHECK: orl
+ ; CHECK: lock
+ ; CHECK: cmpxchgl
call i32 @llvm.atomic.load.or.i32.p0i32( i32* %ort, i32 4080 ) ; <i32>:5 [#uses=1]
store i32 %5, i32* %old
+ ; CHECK: xorl
+ ; CHECK: lock
+ ; CHECK: cmpxchgl
call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %xort, i32 4080 ) ; <i32>:6 [#uses=1]
store i32 %6, i32* %old
+ ; CHECK: cmov
+ ; CHECK: lock
+ ; CHECK: cmpxchgl
call i32 @llvm.atomic.load.min.i32.p0i32( i32* %val2, i32 16 ) ; <i32>:7 [#uses=1]
store i32 %7, i32* %old
%neg = sub i32 0, 1 ; <i32> [#uses=1]
+ ; CHECK: cmov
+ ; CHECK: lock
+ ; CHECK: cmpxchgl
call i32 @llvm.atomic.load.min.i32.p0i32( i32* %val2, i32 %neg ) ; <i32>:8 [#uses=1]
store i32 %8, i32* %old
+ ; CHECK: cmov
+ ; CHECK: lock
+ ; CHECK: cmpxchgl
call i32 @llvm.atomic.load.max.i32.p0i32( i32* %val2, i32 1 ) ; <i32>:9 [#uses=1]
store i32 %9, i32* %old
+ ; CHECK: cmov
+ ; CHECK: lock
+ ; CHECK: cmpxchgl
call i32 @llvm.atomic.load.max.i32.p0i32( i32* %val2, i32 0 ) ; <i32>:10 [#uses=1]
store i32 %10, i32* %old
+ ; CHECK: cmov
+ ; CHECK: lock
+ ; CHECK: cmpxchgl
call i32 @llvm.atomic.load.umax.i32.p0i32( i32* %val2, i32 65535 ) ; <i32>:11 [#uses=1]
store i32 %11, i32* %old
+ ; CHECK: cmov
+ ; CHECK: lock
+ ; CHECK: cmpxchgl
call i32 @llvm.atomic.load.umax.i32.p0i32( i32* %val2, i32 10 ) ; <i32>:12 [#uses=1]
store i32 %12, i32* %old
+ ; CHECK: cmov
+ ; CHECK: lock
+ ; CHECK: cmpxchgl
call i32 @llvm.atomic.load.umin.i32.p0i32( i32* %val2, i32 1 ) ; <i32>:13 [#uses=1]
store i32 %13, i32* %old
+ ; CHECK: cmov
+ ; CHECK: lock
+ ; CHECK: cmpxchgl
call i32 @llvm.atomic.load.umin.i32.p0i32( i32* %val2, i32 10 ) ; <i32>:14 [#uses=1]
store i32 %14, i32* %old
+ ; CHECK: xchgl %{{.*}}, {{.*}}(%esp)
call i32 @llvm.atomic.swap.i32.p0i32( i32* %val2, i32 1976 ) ; <i32>:15 [#uses=1]
store i32 %15, i32* %old
%neg1 = sub i32 0, 10 ; <i32> [#uses=1]
+ ; CHECK: lock
+ ; CHECK: cmpxchgl
call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %val2, i32 %neg1, i32 1 ) ; <i32>:16 [#uses=1]
store i32 %16, i32* %old
+ ; CHECK: lock
+ ; CHECK: cmpxchgl
call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %val2, i32 1976, i32 1 ) ; <i32>:17 [#uses=1]
store i32 %17, i32* %old
ret void
}
+define void @test2(i32 addrspace(256)* nocapture %P) nounwind {
+entry:
+; CHECK: lock
+; CEHCK: cmpxchgl %{{.*}}, %gs:(%{{.*}})
+
+ %0 = tail call i32 @llvm.atomic.cmp.swap.i32.p256i32(i32 addrspace(256)* %P, i32 0, i32 1)
+ ret void
+}
+
+declare i32 @llvm.atomic.cmp.swap.i32.p256i32(i32 addrspace(256)* nocapture, i32, i32) nounwind
+
declare i32 @llvm.atomic.load.add.i32.p0i32(i32*, i32) nounwind
declare i32 @llvm.atomic.load.sub.i32.p0i32(i32*, i32) nounwind
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
index 04246ea..ec5ed17 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -230,7 +230,7 @@ declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
- ; CHECK: vcvttss2si
+ ; CHECK: vcvttsd2si
%res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1]
ret i32 %res
}
@@ -1715,14 +1715,6 @@ define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) {
declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone
-define <8 x i8> @test_x86_ssse3_pabs_b(<8 x i8> %a0) {
- ; CHECK: vpabsb
- %res = call <8 x i8> @llvm.x86.ssse3.pabs.b(<8 x i8> %a0) ; <<8 x i8>> [#uses=1]
- ret <8 x i8> %res
-}
-declare <8 x i8> @llvm.x86.ssse3.pabs.b(<8 x i8>) nounwind readnone
-
-
define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) {
; CHECK: vpabsb
%res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1]
@@ -1731,14 +1723,6 @@ define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) {
declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
-define <2 x i32> @test_x86_ssse3_pabs_d(<2 x i32> %a0) {
- ; CHECK: vpabsd
- %res = call <2 x i32> @llvm.x86.ssse3.pabs.d(<2 x i32> %a0) ; <<2 x i32>> [#uses=1]
- ret <2 x i32> %res
-}
-declare <2 x i32> @llvm.x86.ssse3.pabs.d(<2 x i32>) nounwind readnone
-
-
define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) {
; CHECK: vpabsd
%res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1]
@@ -1747,14 +1731,6 @@ define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) {
declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
-define <4 x i16> @test_x86_ssse3_pabs_w(<4 x i16> %a0) {
- ; CHECK: vpabsw
- %res = call <4 x i16> @llvm.x86.ssse3.pabs.w(<4 x i16> %a0) ; <<4 x i16>> [#uses=1]
- ret <4 x i16> %res
-}
-declare <4 x i16> @llvm.x86.ssse3.pabs.w(<4 x i16>) nounwind readnone
-
-
define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) {
; CHECK: vpabsw
%res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
@@ -1763,14 +1739,6 @@ define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) {
declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
-define <2 x i32> @test_x86_ssse3_phadd_d(<2 x i32> %a0, <2 x i32> %a1) {
- ; CHECK: vphaddd
- %res = call <2 x i32> @llvm.x86.ssse3.phadd.d(<2 x i32> %a0, <2 x i32> %a1) ; <<2 x i32>> [#uses=1]
- ret <2 x i32> %res
-}
-declare <2 x i32> @llvm.x86.ssse3.phadd.d(<2 x i32>, <2 x i32>) nounwind readnone
-
-
define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK: vphaddd
%res = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
@@ -1779,14 +1747,6 @@ define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) {
declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
-define <4 x i16> @test_x86_ssse3_phadd_sw(<4 x i16> %a0, <4 x i16> %a1) {
- ; CHECK: vphaddsw
- %res = call <4 x i16> @llvm.x86.ssse3.phadd.sw(<4 x i16> %a0, <4 x i16> %a1) ; <<4 x i16>> [#uses=1]
- ret <4 x i16> %res
-}
-declare <4 x i16> @llvm.x86.ssse3.phadd.sw(<4 x i16>, <4 x i16>) nounwind readnone
-
-
define <4 x i32> @test_x86_ssse3_phadd_sw_128(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK: vphaddsw
%res = call <4 x i32> @llvm.x86.ssse3.phadd.sw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
@@ -1795,14 +1755,6 @@ define <4 x i32> @test_x86_ssse3_phadd_sw_128(<4 x i32> %a0, <4 x i32> %a1) {
declare <4 x i32> @llvm.x86.ssse3.phadd.sw.128(<4 x i32>, <4 x i32>) nounwind readnone
-define <4 x i16> @test_x86_ssse3_phadd_w(<4 x i16> %a0, <4 x i16> %a1) {
- ; CHECK: vphaddw
- %res = call <4 x i16> @llvm.x86.ssse3.phadd.w(<4 x i16> %a0, <4 x i16> %a1) ; <<4 x i16>> [#uses=1]
- ret <4 x i16> %res
-}
-declare <4 x i16> @llvm.x86.ssse3.phadd.w(<4 x i16>, <4 x i16>) nounwind readnone
-
-
define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK: vphaddw
%res = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -1811,14 +1763,6 @@ define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) {
declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
-define <2 x i32> @test_x86_ssse3_phsub_d(<2 x i32> %a0, <2 x i32> %a1) {
- ; CHECK: vphsubd
- %res = call <2 x i32> @llvm.x86.ssse3.phsub.d(<2 x i32> %a0, <2 x i32> %a1) ; <<2 x i32>> [#uses=1]
- ret <2 x i32> %res
-}
-declare <2 x i32> @llvm.x86.ssse3.phsub.d(<2 x i32>, <2 x i32>) nounwind readnone
-
-
define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK: vphsubd
%res = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
@@ -1827,14 +1771,6 @@ define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) {
declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone
-define <4 x i16> @test_x86_ssse3_phsub_sw(<4 x i16> %a0, <4 x i16> %a1) {
- ; CHECK: vphsubsw
- %res = call <4 x i16> @llvm.x86.ssse3.phsub.sw(<4 x i16> %a0, <4 x i16> %a1) ; <<4 x i16>> [#uses=1]
- ret <4 x i16> %res
-}
-declare <4 x i16> @llvm.x86.ssse3.phsub.sw(<4 x i16>, <4 x i16>) nounwind readnone
-
-
define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK: vphsubsw
%res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -1843,14 +1779,6 @@ define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
-define <4 x i16> @test_x86_ssse3_phsub_w(<4 x i16> %a0, <4 x i16> %a1) {
- ; CHECK: vphsubw
- %res = call <4 x i16> @llvm.x86.ssse3.phsub.w(<4 x i16> %a0, <4 x i16> %a1) ; <<4 x i16>> [#uses=1]
- ret <4 x i16> %res
-}
-declare <4 x i16> @llvm.x86.ssse3.phsub.w(<4 x i16>, <4 x i16>) nounwind readnone
-
-
define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK: vphsubw
%res = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -1859,14 +1787,6 @@ define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) {
declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
-define <4 x i16> @test_x86_ssse3_pmadd_ub_sw(<4 x i16> %a0, <4 x i16> %a1) {
- ; CHECK: vpmaddubsw
- %res = call <4 x i16> @llvm.x86.ssse3.pmadd.ub.sw(<4 x i16> %a0, <4 x i16> %a1) ; <<4 x i16>> [#uses=1]
- ret <4 x i16> %res
-}
-declare <4 x i16> @llvm.x86.ssse3.pmadd.ub.sw(<4 x i16>, <4 x i16>) nounwind readnone
-
-
define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK: vpmaddubsw
%res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -1875,14 +1795,6 @@ define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
-define <4 x i16> @test_x86_ssse3_pmul_hr_sw(<4 x i16> %a0, <4 x i16> %a1) {
- ; CHECK: vpmulhrsw
- %res = call <4 x i16> @llvm.x86.ssse3.pmul.hr.sw(<4 x i16> %a0, <4 x i16> %a1) ; <<4 x i16>> [#uses=1]
- ret <4 x i16> %res
-}
-declare <4 x i16> @llvm.x86.ssse3.pmul.hr.sw(<4 x i16>, <4 x i16>) nounwind readnone
-
-
define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK: vpmulhrsw
%res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -1891,14 +1803,6 @@ define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
-define <8 x i8> @test_x86_ssse3_pshuf_b(<8 x i8> %a0, <8 x i8> %a1) {
- ; CHECK: vpshufb
- %res = call <8 x i8> @llvm.x86.ssse3.pshuf.b(<8 x i8> %a0, <8 x i8> %a1) ; <<8 x i8>> [#uses=1]
- ret <8 x i8> %res
-}
-declare <8 x i8> @llvm.x86.ssse3.pshuf.b(<8 x i8>, <8 x i8>) nounwind readnone
-
-
define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK: vpshufb
%res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
@@ -1907,14 +1811,6 @@ define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) {
declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
-define <8 x i8> @test_x86_ssse3_psign_b(<8 x i8> %a0, <8 x i8> %a1) {
- ; CHECK: vpsignb
- %res = call <8 x i8> @llvm.x86.ssse3.psign.b(<8 x i8> %a0, <8 x i8> %a1) ; <<8 x i8>> [#uses=1]
- ret <8 x i8> %res
-}
-declare <8 x i8> @llvm.x86.ssse3.psign.b(<8 x i8>, <8 x i8>) nounwind readnone
-
-
define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK: vpsignb
%res = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
@@ -1923,14 +1819,6 @@ define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) {
declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone
-define <2 x i32> @test_x86_ssse3_psign_d(<2 x i32> %a0, <2 x i32> %a1) {
- ; CHECK: vpsignd
- %res = call <2 x i32> @llvm.x86.ssse3.psign.d(<2 x i32> %a0, <2 x i32> %a1) ; <<2 x i32>> [#uses=1]
- ret <2 x i32> %res
-}
-declare <2 x i32> @llvm.x86.ssse3.psign.d(<2 x i32>, <2 x i32>) nounwind readnone
-
-
define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) {
; CHECK: vpsignd
%res = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
@@ -1939,14 +1827,6 @@ define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) {
declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone
-define <4 x i16> @test_x86_ssse3_psign_w(<4 x i16> %a0, <4 x i16> %a1) {
- ; CHECK: vpsignw
- %res = call <4 x i16> @llvm.x86.ssse3.psign.w(<4 x i16> %a0, <4 x i16> %a1) ; <<4 x i16>> [#uses=1]
- ret <4 x i16> %res
-}
-declare <4 x i16> @llvm.x86.ssse3.psign.w(<4 x i16>, <4 x i16>) nounwind readnone
-
-
define <8 x i16> @test_x86_ssse3_psign_w_128(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK: vpsignw
%res = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
diff --git a/test/CodeGen/X86/avx-intrinsics-x86_64.ll b/test/CodeGen/X86/avx-intrinsics-x86_64.ll
index b186710..5a466fc 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86_64.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86_64.ll
@@ -17,7 +17,7 @@ declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readn
define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) {
- ; CHECK: vcvttss2si
+ ; CHECK: vcvttsd2si
%res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
ret i64 %res
}
diff --git a/test/CodeGen/X86/bit-test-shift.ll b/test/CodeGen/X86/bit-test-shift.ll
new file mode 100644
index 0000000..7497613
--- /dev/null
+++ b/test/CodeGen/X86/bit-test-shift.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; <rdar://problem/8285015>
+
+define i32 @x(i32 %t) nounwind readnone ssp {
+entry:
+; CHECK: shll $23, %eax
+; CHECK: sarl $31, %eax
+; CHECK: andl $-26, %eax
+ %and = and i32 %t, 256
+ %tobool = icmp eq i32 %and, 0
+ %retval.0 = select i1 %tobool, i32 0, i32 -26
+ ret i32 %retval.0
+}
diff --git a/test/CodeGen/X86/bswap-inline-asm.ll b/test/CodeGen/X86/bswap-inline-asm.ll
index 2b70193..ecb4cec 100644
--- a/test/CodeGen/X86/bswap-inline-asm.ll
+++ b/test/CodeGen/X86/bswap-inline-asm.ll
@@ -65,6 +65,13 @@ define i32 @t32(i32 %x) nounwind {
ret i32 %asmtmp
}
+; CHECK: u32:
+; CHECK: bswapl
+define i32 @u32(i32 %x) nounwind {
+ %asmtmp = tail call i32 asm "rorw $$8, ${0:w};rorl $$16, $0;rorw $$8, ${0:w}", "=r,0,~{cc},~{dirflag},~{flags},~{fpsr}"(i32 %x) nounwind
+ ret i32 %asmtmp
+}
+
; CHECK: s64:
; CHECK: bswapq
define i64 @s64(i64 %x) nounwind {
diff --git a/test/CodeGen/X86/byval.ll b/test/CodeGen/X86/byval.ll
index af36e1b..ac0bc09 100644
--- a/test/CodeGen/X86/byval.ll
+++ b/test/CodeGen/X86/byval.ll
@@ -1,7 +1,10 @@
-; RUN: llc < %s -march=x86-64 | grep {movq 8(%rsp), %rax}
-; RUN: llc < %s -march=x86 > %t
-; RUN: grep {movl 8(%esp), %edx} %t
-; RUN: grep {movl 4(%esp), %eax} %t
+; RUN: llc < %s -march=x86-64 | FileCheck -check-prefix=X86-64 %s
+; RUN: llc < %s -march=x86 | FileCheck -check-prefix=X86 %s
+
+; X86: movl 4(%esp), %eax
+; X86: movl 8(%esp), %edx
+
+; X86-64: movq 8(%rsp), %rax
%struct.s = type { i64, i64, i64 }
diff --git a/test/CodeGen/X86/cmp-test.ll b/test/CodeGen/X86/cmp-test.ll
deleted file mode 100644
index 898c09b..0000000
--- a/test/CodeGen/X86/cmp-test.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; RUN: llc < %s -march=x86 | grep cmp | count 1
-; RUN: llc < %s -march=x86 | grep test | count 1
-
-define i32 @f1(i32 %X, i32* %y) {
- %tmp = load i32* %y ; <i32> [#uses=1]
- %tmp.upgrd.1 = icmp eq i32 %tmp, 0 ; <i1> [#uses=1]
- br i1 %tmp.upgrd.1, label %ReturnBlock, label %cond_true
-
-cond_true: ; preds = %0
- ret i32 1
-
-ReturnBlock: ; preds = %0
- ret i32 0
-}
-
-define i32 @f2(i32 %X, i32* %y) {
- %tmp = load i32* %y ; <i32> [#uses=1]
- %tmp1 = shl i32 %tmp, 3 ; <i32> [#uses=1]
- %tmp1.upgrd.2 = icmp eq i32 %tmp1, 0 ; <i1> [#uses=1]
- br i1 %tmp1.upgrd.2, label %ReturnBlock, label %cond_true
-
-cond_true: ; preds = %0
- ret i32 1
-
-ReturnBlock: ; preds = %0
- ret i32 0
-}
diff --git a/test/CodeGen/X86/cmp.ll b/test/CodeGen/X86/cmp.ll
new file mode 100644
index 0000000..ef5e353
--- /dev/null
+++ b/test/CodeGen/X86/cmp.ll
@@ -0,0 +1,92 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -show-mc-encoding | FileCheck %s
+
+define i32 @test1(i32 %X, i32* %y) nounwind {
+ %tmp = load i32* %y ; <i32> [#uses=1]
+ %tmp.upgrd.1 = icmp eq i32 %tmp, 0 ; <i1> [#uses=1]
+ br i1 %tmp.upgrd.1, label %ReturnBlock, label %cond_true
+
+cond_true: ; preds = %0
+ ret i32 1
+
+ReturnBlock: ; preds = %0
+ ret i32 0
+; CHECK: test1:
+; CHECK: cmpl $0, (%rsi)
+}
+
+define i32 @test2(i32 %X, i32* %y) nounwind {
+ %tmp = load i32* %y ; <i32> [#uses=1]
+ %tmp1 = shl i32 %tmp, 3 ; <i32> [#uses=1]
+ %tmp1.upgrd.2 = icmp eq i32 %tmp1, 0 ; <i1> [#uses=1]
+ br i1 %tmp1.upgrd.2, label %ReturnBlock, label %cond_true
+
+cond_true: ; preds = %0
+ ret i32 1
+
+ReturnBlock: ; preds = %0
+ ret i32 0
+; CHECK: test2:
+; CHECK: movl (%rsi), %eax
+; CHECK: shll $3, %eax
+; CHECK: testl %eax, %eax
+}
+
+define i64 @test3(i64 %x) nounwind {
+ %t = icmp eq i64 %x, 0
+ %r = zext i1 %t to i64
+ ret i64 %r
+; CHECK: test3:
+; CHECK: testq %rdi, %rdi
+; CHECK: sete %al
+; CHECK: movzbl %al, %eax
+; CHECK: ret
+}
+
+define i64 @test4(i64 %x) nounwind {
+ %t = icmp slt i64 %x, 1
+ %r = zext i1 %t to i64
+ ret i64 %r
+; CHECK: test4:
+; CHECK: testq %rdi, %rdi
+; CHECK: setle %al
+; CHECK: movzbl %al, %eax
+; CHECK: ret
+}
+
+
+define i32 @test5(double %A) nounwind {
+ entry:
+ %tmp2 = fcmp ogt double %A, 1.500000e+02; <i1> [#uses=1]
+ %tmp5 = fcmp ult double %A, 7.500000e+01; <i1> [#uses=1]
+ %bothcond = or i1 %tmp2, %tmp5; <i1> [#uses=1]
+ br i1 %bothcond, label %bb8, label %bb12
+
+ bb8:; preds = %entry
+ %tmp9 = tail call i32 (...)* @foo( ) nounwind ; <i32> [#uses=1]
+ ret i32 %tmp9
+
+ bb12:; preds = %entry
+ ret i32 32
+; CHECK: test5:
+; CHECK: ucomisd LCPI4_0(%rip), %xmm0
+; CHECK: ucomisd LCPI4_1(%rip), %xmm0
+}
+
+declare i32 @foo(...)
+
+define i32 @test6() nounwind align 2 {
+ %A = alloca {i64, i64}, align 8
+ %B = getelementptr inbounds {i64, i64}* %A, i64 0, i32 1
+ %C = load i64* %B
+ %D = icmp eq i64 %C, 0
+ br i1 %D, label %T, label %F
+T:
+ ret i32 1
+
+F:
+ ret i32 0
+; CHECK: test6:
+; CHECK: cmpq $0, -8(%rsp)
+; CHECK: encoding: [0x48,0x83,0x7c,0x24,0xf8,0x00]
+}
+
diff --git a/test/CodeGen/X86/cmp0.ll b/test/CodeGen/X86/cmp0.ll
deleted file mode 100644
index 48784488..0000000
--- a/test/CodeGen/X86/cmp0.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
-
-define i64 @test0(i64 %x) nounwind {
- %t = icmp eq i64 %x, 0
- %r = zext i1 %t to i64
- ret i64 %r
-; CHECK: test0:
-; CHECK: testq %rdi, %rdi
-; CHECK: sete %al
-; CHECK: movzbl %al, %eax
-; CHECK: ret
-}
-
-define i64 @test1(i64 %x) nounwind {
- %t = icmp slt i64 %x, 1
- %r = zext i1 %t to i64
- ret i64 %r
-; CHECK: test1:
-; CHECK: testq %rdi, %rdi
-; CHECK: setle %al
-; CHECK: movzbl %al, %eax
-; CHECK: ret
-}
-
diff --git a/test/CodeGen/X86/cmp2.ll b/test/CodeGen/X86/cmp2.ll
deleted file mode 100644
index 9a8e00c..0000000
--- a/test/CodeGen/X86/cmp2.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep ucomisd | grep CPI | count 2
-
-define i32 @test(double %A) nounwind {
- entry:
- %tmp2 = fcmp ogt double %A, 1.500000e+02; <i1> [#uses=1]
- %tmp5 = fcmp ult double %A, 7.500000e+01; <i1> [#uses=1]
- %bothcond = or i1 %tmp2, %tmp5; <i1> [#uses=1]
- br i1 %bothcond, label %bb8, label %bb12
-
- bb8:; preds = %entry
- %tmp9 = tail call i32 (...)* @foo( ) nounwind ; <i32> [#uses=1]
- ret i32 %tmp9
-
- bb12:; preds = %entry
- ret i32 32
-}
-
-declare i32 @foo(...)
diff --git a/test/CodeGen/X86/compare-inf.ll b/test/CodeGen/X86/compare-inf.ll
index 2be90c9..9aa44a3 100644
--- a/test/CodeGen/X86/compare-inf.ll
+++ b/test/CodeGen/X86/compare-inf.ll
@@ -5,7 +5,7 @@
; CHECK: oeq_inff:
; CHECK: ucomiss
-; CHECK: jae
+; CHECK: jb
define float @oeq_inff(float %x, float %y) nounwind readonly {
%t0 = fcmp oeq float %x, 0x7FF0000000000000
%t1 = select i1 %t0, float 1.0, float %y
@@ -14,7 +14,7 @@ define float @oeq_inff(float %x, float %y) nounwind readonly {
; CHECK: oeq_inf:
; CHECK: ucomisd
-; CHECK: jae
+; CHECK: jb
define double @oeq_inf(double %x, double %y) nounwind readonly {
%t0 = fcmp oeq double %x, 0x7FF0000000000000
%t1 = select i1 %t0, double 1.0, double %y
@@ -23,7 +23,7 @@ define double @oeq_inf(double %x, double %y) nounwind readonly {
; CHECK: une_inff:
; CHECK: ucomiss
-; CHECK: jb
+; CHECK: jae
define float @une_inff(float %x, float %y) nounwind readonly {
%t0 = fcmp une float %x, 0x7FF0000000000000
%t1 = select i1 %t0, float 1.0, float %y
@@ -32,7 +32,7 @@ define float @une_inff(float %x, float %y) nounwind readonly {
; CHECK: une_inf:
; CHECK: ucomisd
-; CHECK: jb
+; CHECK: jae
define double @une_inf(double %x, double %y) nounwind readonly {
%t0 = fcmp une double %x, 0x7FF0000000000000
%t1 = select i1 %t0, double 1.0, double %y
@@ -41,7 +41,7 @@ define double @une_inf(double %x, double %y) nounwind readonly {
; CHECK: oeq_neg_inff:
; CHECK: ucomiss
-; CHECK: jae
+; CHECK: jb
define float @oeq_neg_inff(float %x, float %y) nounwind readonly {
%t0 = fcmp oeq float %x, 0xFFF0000000000000
%t1 = select i1 %t0, float 1.0, float %y
@@ -50,7 +50,7 @@ define float @oeq_neg_inff(float %x, float %y) nounwind readonly {
; CHECK: oeq_neg_inf:
; CHECK: ucomisd
-; CHECK: jae
+; CHECK: jb
define double @oeq_neg_inf(double %x, double %y) nounwind readonly {
%t0 = fcmp oeq double %x, 0xFFF0000000000000
%t1 = select i1 %t0, double 1.0, double %y
@@ -59,7 +59,7 @@ define double @oeq_neg_inf(double %x, double %y) nounwind readonly {
; CHECK: une_neg_inff:
; CHECK: ucomiss
-; CHECK: jb
+; CHECK: jae
define float @une_neg_inff(float %x, float %y) nounwind readonly {
%t0 = fcmp une float %x, 0xFFF0000000000000
%t1 = select i1 %t0, float 1.0, float %y
@@ -68,7 +68,7 @@ define float @une_neg_inff(float %x, float %y) nounwind readonly {
; CHECK: une_neg_inf:
; CHECK: ucomisd
-; CHECK: jb
+; CHECK: jae
define double @une_neg_inf(double %x, double %y) nounwind readonly {
%t0 = fcmp une double %x, 0xFFF0000000000000
%t1 = select i1 %t0, double 1.0, double %y
diff --git a/test/CodeGen/X86/complex-asm.ll b/test/CodeGen/X86/complex-asm.ll
new file mode 100644
index 0000000..49878b9
--- /dev/null
+++ b/test/CodeGen/X86/complex-asm.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
+; This formerly crashed.
+
+%0 = type { i64, i64 }
+
+define %0 @f() nounwind ssp {
+entry:
+ %v = alloca %0, align 8
+ call void asm sideeffect "", "=*r,r,r,0,~{dirflag},~{fpsr},~{flags}"(%0* %v, i32 0, i32 1, i128 undef) nounwind
+ %0 = getelementptr inbounds %0* %v, i64 0, i32 0
+ %1 = load i64* %0, align 8
+ %2 = getelementptr inbounds %0* %v, i64 0, i32 1
+ %3 = load i64* %2, align 8
+ %mrv4 = insertvalue %0 undef, i64 %1, 0
+ %mrv5 = insertvalue %0 %mrv4, i64 %3, 1
+ ret %0 %mrv5
+}
diff --git a/test/CodeGen/X86/dll-linkage.ll b/test/CodeGen/X86/dll-linkage.ll
index c634c7e..9136175 100644
--- a/test/CodeGen/X86/dll-linkage.ll
+++ b/test/CodeGen/X86/dll-linkage.ll
@@ -3,7 +3,7 @@
declare dllimport void @foo()
define void @bar() nounwind {
-; CHECK: call *__imp__foo
+; CHECK: calll *__imp__foo
call void @foo()
ret void
}
diff --git a/test/CodeGen/X86/dllexport.ll b/test/CodeGen/X86/dllexport.ll
index bdbaac0..bf57e78 100644
--- a/test/CodeGen/X86/dllexport.ll
+++ b/test/CodeGen/X86/dllexport.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s | FileCheck %s
; PR2936
-target triple = "i386-mingw32"
+target triple = "i386-pc-mingw32"
define dllexport x86_fastcallcc i32 @foo() nounwind {
entry:
diff --git a/test/CodeGen/X86/dollar-name.ll b/test/CodeGen/X86/dollar-name.ll
index 3b26319..2ecd729 100644
--- a/test/CodeGen/X86/dollar-name.ll
+++ b/test/CodeGen/X86/dollar-name.ll
@@ -7,7 +7,7 @@
define i32 @"$foo"() nounwind {
; CHECK: movl ($bar),
; CHECK: addl ($qux),
-; CHECK: call ($hen)
+; CHECK: calll ($hen)
%m = load i32* @"$bar"
%n = load i32* @"$qux"
%t = add i32 %m, %n
diff --git a/test/CodeGen/X86/dyn-stackalloc.ll b/test/CodeGen/X86/dyn-stackalloc.ll
index 1df0920..e577611 100644
--- a/test/CodeGen/X86/dyn-stackalloc.ll
+++ b/test/CodeGen/X86/dyn-stackalloc.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 | not egrep {\\\$4294967289|-7}
-; RUN: llc < %s -march=x86 | egrep {\\\$4294967280|-16}
-; RUN: llc < %s -march=x86-64 | grep {\\-16}
+; RUN: llc < %s -mtriple=i686-linux | not egrep {\\\$4294967289|-7}
+; RUN: llc < %s -mtriple=i686-linux | egrep {\\\$4294967280|-16}
+; RUN: llc < %s -mtriple=x86_64-linux | grep {\\-16}
define void @t() nounwind {
A:
diff --git a/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll b/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll
new file mode 100644
index 0000000..9233d3f
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll
@@ -0,0 +1,23 @@
+; RUN: llc -O0 -relocation-model=pic < %s | not grep call
+; rdar://8396318
+
+; Don't emit a PIC base register if no addresses are needed.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin11.0.0"
+
+define i32 @foo(i32 %x, i32 %y, i32 %z) nounwind ssp {
+entry:
+ %x.addr = alloca i32, align 4
+ %y.addr = alloca i32, align 4
+ %z.addr = alloca i32, align 4
+ store i32 %x, i32* %x.addr, align 4
+ store i32 %y, i32* %y.addr, align 4
+ store i32 %z, i32* %z.addr, align 4
+ %tmp = load i32* %x.addr, align 4
+ %tmp1 = load i32* %y.addr, align 4
+ %add = add nsw i32 %tmp, %tmp1
+ %tmp2 = load i32* %z.addr, align 4
+ %add3 = add nsw i32 %add, %tmp2
+ ret i32 %add3
+}
diff --git a/test/CodeGen/X86/fast-isel-bc.ll b/test/CodeGen/X86/fast-isel-bc.ll
index 8d7dc8f..4abc3b5 100644
--- a/test/CodeGen/X86/fast-isel-bc.ll
+++ b/test/CodeGen/X86/fast-isel-bc.ll
@@ -1,19 +1,23 @@
-; RUN: llc < %s -O0 -regalloc=linearscan -march=x86-64 -mattr=+mmx | FileCheck %s
+; RUN: llc < %s -O0 -regalloc=linearscan -march=x86-64 -mattr=+mmx,+sse2 | FileCheck %s
; PR4684
target datalayout =
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-apple-darwin9.8"
-declare void @func2(<1 x i64>)
+declare void @func2(x86_mmx)
define void @func1() nounwind {
; This isn't spectacular, but it's MMX code at -O0...
-; CHECK: movl $2, %eax
-; CHECK: movd %rax, %mm0
-; CHECK: movd %mm0, %rdi
+; CHECK: movq2dq %mm0, %xmm0
+; For now, handling of x86_mmx parameters in fast Isel is unimplemented,
+; so we get pretty poor code. The below is preferable.
+; CHEK: movl $2, %eax
+; CHEK: movd %rax, %mm0
+; CHEK: movd %mm0, %rdi
- call void @func2(<1 x i64> <i64 2>)
+ %tmp0 = bitcast <2 x i32><i32 0, i32 2> to x86_mmx
+ call void @func2(x86_mmx %tmp0)
ret void
}
diff --git a/test/CodeGen/X86/fast-isel-mem.ll b/test/CodeGen/X86/fast-isel-mem.ll
index 35ec1e7..8db1936 100644
--- a/test/CodeGen/X86/fast-isel-mem.ll
+++ b/test/CodeGen/X86/fast-isel-mem.ll
@@ -1,10 +1,8 @@
-; RUN: llc < %s -fast-isel -mtriple=i386-apple-darwin | \
-; RUN: grep lazy_ptr, | count 2
-; RUN: llc < %s -fast-isel -march=x86 -relocation-model=static | \
-; RUN: grep lea
+; RUN: llc < %s -fast-isel -mtriple=i386-apple-darwin | FileCheck %s
@src = external global i32
+; rdar://6653118
define i32 @loadgv() nounwind {
entry:
%0 = load i32* @src, align 4
@@ -12,6 +10,14 @@ entry:
%2 = add i32 %0, %1
store i32 %2, i32* @src
ret i32 %2
+; This should fold one of the loads into the add.
+; CHECK: loadgv:
+; CHECK: movl L_src$non_lazy_ptr, %ecx
+; CHECK: movl (%ecx), %eax
+; CHECK: addl (%ecx), %eax
+; CHECK: movl %eax, (%ecx)
+; CHECK: ret
+
}
%stuff = type { i32 (...)** }
@@ -21,4 +27,8 @@ define void @t(%stuff* %this) nounwind {
entry:
store i32 (...)** getelementptr ([4 x i32 (...)*]* @LotsStuff, i32 0, i32 2), i32 (...)*** null, align 4
ret void
+; CHECK: _t:
+; CHECK: movl $0, %eax
+; CHECK: movl L_LotsStuff$non_lazy_ptr, %ecx
+
}
diff --git a/test/CodeGen/X86/fltused.ll b/test/CodeGen/X86/fltused.ll
new file mode 100644
index 0000000..2ffcb96
--- /dev/null
+++ b/test/CodeGen/X86/fltused.ll
@@ -0,0 +1,19 @@
+; The purpose of this test to to verify that the fltused symbol is emitted when
+; any function is called with floating point arguments on Windows. And that it
+; is not emitted otherwise.
+
+; RUN: llc < %s -mtriple i686-pc-win32 | FileCheck %s --check-prefix WIN32
+; RUN: llc < %s -mtriple x86_64-pc-win32 | FileCheck %s --check-prefix WIN64
+
+@.str = private constant [4 x i8] c"%f\0A\00"
+
+define i32 @main() nounwind {
+entry:
+ %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), double 1.000000e+000) nounwind
+ ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+; WIN32: .globl __fltused
+; WIN64: .globl _fltused
diff --git a/test/CodeGen/X86/fp-stack-compare.ll b/test/CodeGen/X86/fp-stack-compare.ll
index 4bdf459..b216914 100644
--- a/test/CodeGen/X86/fp-stack-compare.ll
+++ b/test/CodeGen/X86/fp-stack-compare.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=i386 | \
-; RUN: grep {fucomi.*st.\[12\]}
+; RUN: llc < %s -march=x86 -mcpu=i386 | grep {fucompi.*st.\[12\]}
; PR1012
define float @foo(float* %col.2.0) {
diff --git a/test/CodeGen/X86/ghc-cc.ll b/test/CodeGen/X86/ghc-cc.ll
index 9393cf5..0e65cfd 100644
--- a/test/CodeGen/X86/ghc-cc.ll
+++ b/test/CodeGen/X86/ghc-cc.ll
@@ -11,9 +11,9 @@ define void @zap(i32 %a, i32 %b) nounwind {
entry:
; CHECK: movl {{[0-9]*}}(%esp), %ebx
; CHECK-NEXT: movl {{[0-9]*}}(%esp), %ebp
- ; CHECK-NEXT: call addtwo
+ ; CHECK-NEXT: calll addtwo
%0 = call cc 10 i32 @addtwo(i32 %a, i32 %b)
- ; CHECK: call foo
+ ; CHECK: calll foo
call void @foo() nounwind
ret void
}
diff --git a/test/CodeGen/X86/global-sections.ll b/test/CodeGen/X86/global-sections.ll
index 2d8f429..6d21191 100644
--- a/test/CodeGen/X86/global-sections.ll
+++ b/test/CodeGen/X86/global-sections.ll
@@ -20,7 +20,7 @@
; TODO: linux drops this into .rodata, we drop it into ".gnu.linkonce.r.G2"
-; DARWIN: .section __TEXT,__const_coal,coalesced,pure_instructions
+; DARWIN: .section __TEXT,__const_coal,coalesced
; DARWIN: _G2:
; DARWIN: .long 42
@@ -85,7 +85,7 @@
; LINUX: .byte 1
; LINUX: .size G6, 1
-; DARWIN: .section __TEXT,__const_coal,coalesced,pure_instructions
+; DARWIN: .section __TEXT,__const_coal,coalesced
; DARWIN: .globl _G6
; DARWIN: .weak_definition _G6
; DARWIN:_G6:
diff --git a/test/CodeGen/X86/legalizedag_vec.ll b/test/CodeGen/X86/legalizedag_vec.ll
index 574b46a..028627d 100644
--- a/test/CodeGen/X86/legalizedag_vec.ll
+++ b/test/CodeGen/X86/legalizedag_vec.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=sse2 -disable-mmx -o %t
-; RUN: grep {call.*divdi3} %t | count 2
+; RUN: llc < %s -march=x86 -mattr=sse2 -disable-mmx | FileCheck %s
; Test case for r63760 where we generate a legalization assert that an illegal
@@ -12,4 +11,7 @@
define <2 x i64> @test_long_div(<2 x i64> %num, <2 x i64> %div) {
%div.r = sdiv <2 x i64> %num, %div
ret <2 x i64> %div.r
-}
+}
+
+; CHECK: call{{.*(divdi3|alldiv)}}
+; CHECK: call{{.*(divdi3|alldiv)}}
diff --git a/test/CodeGen/X86/loop-strength-reduce4.ll b/test/CodeGen/X86/loop-strength-reduce4.ll
index 6c0eb8c..6556fde 100644
--- a/test/CodeGen/X86/loop-strength-reduce4.ll
+++ b/test/CodeGen/X86/loop-strength-reduce4.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 -relocation-model=static -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=STATIC
-; RUN: llc < %s -march=x86 -relocation-model=pic | FileCheck %s -check-prefix=PIC
+; RUN: llc < %s -mtriple=i686-apple-darwin -relocation-model=static | FileCheck %s -check-prefix=STATIC
+; RUN: llc < %s -mtriple=i686-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=PIC
; By starting the IV at -64 instead of 0, a cmp is eliminated,
; as the flags from the add can be used directly.
diff --git a/test/CodeGen/X86/lsr-reuse.ll b/test/CodeGen/X86/lsr-reuse.ll
index d2ff58b..2a97629 100644
--- a/test/CodeGen/X86/lsr-reuse.ll
+++ b/test/CodeGen/X86/lsr-reuse.ll
@@ -353,11 +353,11 @@ return:
; CHECK: count_me_3:
; CHECK: call
-; CHECK: movsd (%r15,%r13,8), %xmm0
-; CHECK: mulsd (%r14,%r13,8), %xmm0
-; CHECK: movsd %xmm0, (%r12,%r13,8)
-; CHECK: incq %r13
-; CHECK: cmpq %r13, %rbx
+; CHECK: movsd (%r{{[^,]*}},%r{{[^,]*}},8), %xmm0
+; CHECK: mulsd (%r{{[^,]*}},%r{{[^,]*}},8), %xmm0
+; CHECK: movsd %xmm0, (%r{{[^,]*}},%r{{[^,]*}},8)
+; CHECK: incq %r{{.*}}
+; CHECK: cmpq %r{{.*}}, %r{{.*}}
; CHECK: jne
declare void @use(i64)
@@ -389,7 +389,7 @@ return:
; rdar://7657764
; CHECK: asd:
-; CHECK: BB9_5:
+; CHECK: BB9_4:
; CHECK-NEXT: addl (%r{{[^,]*}},%rdi,4), %e
; CHECK-NEXT: incq %rdi
; CHECK-NEXT: cmpq %rdi, %r{{[^,]*}}
@@ -464,7 +464,7 @@ bb5: ; preds = %bb3, %entry
; And the one at %bb68, where we want to be sure to use superhero mode:
-; CHECK: BB10_9:
+; CHECK: BB10_7:
; CHECK-NEXT: movaps 48(%r{{[^,]*}}), %xmm{{.*}}
; CHECK-NEXT: mulps %xmm{{.*}}, %xmm{{.*}}
; CHECK-NEXT: movaps 32(%r{{[^,]*}}), %xmm{{.*}}
@@ -484,7 +484,6 @@ bb5: ; preds = %bb3, %entry
; CHECK-NEXT: addq $64, %r{{.*}}
; CHECK-NEXT: addq $64, %r{{.*}}
; CHECK-NEXT: addq $-16, %r{{.*}}
-; CHECK-NEXT: BB10_10:
; CHECK-NEXT: cmpq $15, %r{{.*}}
; CHECK-NEXT: jg
diff --git a/test/CodeGen/X86/lsr-wrap.ll b/test/CodeGen/X86/lsr-wrap.ll
index ec8db50..d605e4f 100644
--- a/test/CodeGen/X86/lsr-wrap.ll
+++ b/test/CodeGen/X86/lsr-wrap.ll
@@ -3,7 +3,7 @@
; LSR would like to use a single IV for both of these, however it's
; not safe due to wraparound.
-; CHECK: addb $-4, %r
+; CHECK: addb $-4, %
; CHECK: decw %
@g_19 = common global i32 0 ; <i32*> [#uses=2]
diff --git a/test/CodeGen/X86/memcmp.ll b/test/CodeGen/X86/memcmp.ll
index b90d2e2..36be1f3 100644
--- a/test/CodeGen/X86/memcmp.ll
+++ b/test/CodeGen/X86/memcmp.ll
@@ -20,8 +20,8 @@ bb: ; preds = %entry
return: ; preds = %entry
ret void
; CHECK: memcmp2:
-; CHECK: movw (%rsi), %ax
-; CHECK: cmpw %ax, (%rdi)
+; CHECK: movw (%rdi), %ax
+; CHECK: cmpw (%rsi), %ax
}
define void @memcmp2a(i8* %X, i32* nocapture %P) nounwind {
@@ -54,8 +54,8 @@ bb: ; preds = %entry
return: ; preds = %entry
ret void
; CHECK: memcmp4:
-; CHECK: movl (%rsi), %eax
-; CHECK: cmpl %eax, (%rdi)
+; CHECK: movl (%rdi), %eax
+; CHECK: cmpl (%rsi), %eax
}
define void @memcmp4a(i8* %X, i32* nocapture %P) nounwind {
@@ -87,8 +87,8 @@ bb: ; preds = %entry
return: ; preds = %entry
ret void
; CHECK: memcmp8:
-; CHECK: movq (%rsi), %rax
-; CHECK: cmpq %rax, (%rdi)
+; CHECK: movq (%rdi), %rax
+; CHECK: cmpq (%rsi), %rax
}
define void @memcmp8a(i8* %X, i32* nocapture %P) nounwind {
diff --git a/test/CodeGen/X86/memmove-0.ll b/test/CodeGen/X86/memmove-0.ll
deleted file mode 100644
index d405068..0000000
--- a/test/CodeGen/X86/memmove-0.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep {call memcpy}
-
-declare void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 %a)
-
-define void @foo(i8* noalias %d, i8* noalias %s, i64 %l)
-{
- call void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 1)
- ret void
-}
diff --git a/test/CodeGen/X86/memmove-1.ll b/test/CodeGen/X86/memmove-1.ll
deleted file mode 100644
index 2057be8..0000000
--- a/test/CodeGen/X86/memmove-1.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep {call memmove}
-
-declare void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 %a)
-
-define void @foo(i8* %d, i8* %s, i64 %l)
-{
- call void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 1)
- ret void
-}
diff --git a/test/CodeGen/X86/memmove-2.ll b/test/CodeGen/X86/memmove-2.ll
deleted file mode 100644
index 68a9f4d..0000000
--- a/test/CodeGen/X86/memmove-2.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | not grep call
-
-declare void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 %a)
-
-define void @foo(i8* noalias %d, i8* noalias %s)
-{
- call void @llvm.memmove.i64(i8* %d, i8* %s, i64 32, i32 1)
- ret void
-}
diff --git a/test/CodeGen/X86/memmove-3.ll b/test/CodeGen/X86/memmove-3.ll
deleted file mode 100644
index d8a419c..0000000
--- a/test/CodeGen/X86/memmove-3.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep {call memmove}
-
-declare void @llvm.memmove.i64(i8* %d, i8* %s, i64 %l, i32 %a)
-
-define void @foo(i8* %d, i8* %s)
-{
- call void @llvm.memmove.i64(i8* %d, i8* %s, i64 32, i32 1)
- ret void
-}
diff --git a/test/CodeGen/X86/memset-2.ll b/test/CodeGen/X86/memset-2.ll
index 0e15595..692965f 100644
--- a/test/CodeGen/X86/memset-2.ll
+++ b/test/CodeGen/X86/memset-2.ll
@@ -5,7 +5,7 @@ declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
define fastcc void @t1() nounwind {
entry:
; CHECK: t1:
-; CHECK: call _memset
+; CHECK: calll _memset
call void @llvm.memset.i32( i8* null, i8 0, i32 188, i32 1 ) nounwind
unreachable
}
@@ -13,7 +13,7 @@ entry:
define fastcc void @t2(i8 signext %c) nounwind {
entry:
; CHECK: t2:
-; CHECK: call _memset
+; CHECK: calll _memset
call void @llvm.memset.i32( i8* undef, i8 %c, i32 76, i32 1 ) nounwind
unreachable
}
diff --git a/test/CodeGen/X86/mingw-alloca.ll b/test/CodeGen/X86/mingw-alloca.ll
index 7dcd84d..ded4b73 100644
--- a/test/CodeGen/X86/mingw-alloca.ll
+++ b/test/CodeGen/X86/mingw-alloca.ll
@@ -6,7 +6,7 @@ target triple = "i386-pc-mingw32"
define void @foo1(i32 %N) nounwind {
entry:
; CHECK: _foo1:
-; CHECK: call __alloca
+; CHECK: calll __alloca
%tmp14 = alloca i32, i32 %N ; <i32*> [#uses=1]
call void @bar1( i32* %tmp14 )
ret void
@@ -19,7 +19,7 @@ entry:
; CHECK: _foo2:
; CHECK: andl $-16, %esp
; CHECK: pushl %eax
-; CHECK: call __alloca
+; CHECK: calll __alloca
; CHECK: movl 8028(%esp), %eax
%A2 = alloca [2000 x i32], align 16 ; <[2000 x i32]*> [#uses=1]
%A2.sub = getelementptr [2000 x i32]* %A2, i32 0, i32 0 ; <i32*> [#uses=1]
diff --git a/test/CodeGen/X86/mmx-arg-passing.ll b/test/CodeGen/X86/mmx-arg-passing.ll
index 426e98e..b348512 100644
--- a/test/CodeGen/X86/mmx-arg-passing.ll
+++ b/test/CodeGen/X86/mmx-arg-passing.ll
@@ -1,24 +1,27 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | grep mm0 | count 3
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | grep esp | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | grep mm0 | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | grep esp | count 2
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep xmm0
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep rdi
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | not grep movups
;
; On Darwin x86-32, v8i8, v4i16, v2i32 values are passed in MM[0-2].
-; On Darwin x86-32, v1i64 values are passed in memory.
+; On Darwin x86-32, v1i64 values are passed in memory. In this example, they
+; are never moved into an MM register at all.
; On Darwin x86-64, v8i8, v4i16, v2i32 values are passed in XMM[0-7].
; On Darwin x86-64, v1i64 values are passed in 64-bit GPRs.
-@u1 = external global <8 x i8>
+@u1 = external global x86_mmx
-define void @t1(<8 x i8> %v1) nounwind {
- store <8 x i8> %v1, <8 x i8>* @u1, align 8
+define void @t1(x86_mmx %v1) nounwind {
+ store x86_mmx %v1, x86_mmx* @u1, align 8
ret void
}
-@u2 = external global <1 x i64>
+@u2 = external global x86_mmx
define void @t2(<1 x i64> %v1) nounwind {
- store <1 x i64> %v1, <1 x i64>* @u2, align 8
+ %tmp = bitcast <1 x i64> %v1 to x86_mmx
+ store x86_mmx %tmp, x86_mmx* @u2, align 8
ret void
}
+
diff --git a/test/CodeGen/X86/mmx-arg-passing2.ll b/test/CodeGen/X86/mmx-arg-passing2.ll
index c42af08..c132d31 100644
--- a/test/CodeGen/X86/mmx-arg-passing2.ll
+++ b/test/CodeGen/X86/mmx-arg-passing2.ll
@@ -1,17 +1,21 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep movq2dq | count 1
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep movdq2q | count 2
+; Since the add is not an MMX add, we don't have a movq2dq any more.
@g_v8qi = external global <8 x i8>
define void @t1() nounwind {
%tmp3 = load <8 x i8>* @g_v8qi, align 8
- %tmp4 = tail call i32 (...)* @pass_v8qi( <8 x i8> %tmp3 ) nounwind
+ %tmp3a = bitcast <8 x i8> %tmp3 to x86_mmx
+ %tmp4 = tail call i32 (...)* @pass_v8qi( x86_mmx %tmp3a ) nounwind
ret void
}
-define void @t2(<8 x i8> %v1, <8 x i8> %v2) nounwind {
- %tmp3 = add <8 x i8> %v1, %v2
- %tmp4 = tail call i32 (...)* @pass_v8qi( <8 x i8> %tmp3 ) nounwind
+define void @t2(x86_mmx %v1, x86_mmx %v2) nounwind {
+ %v1a = bitcast x86_mmx %v1 to <8 x i8>
+ %v2b = bitcast x86_mmx %v2 to <8 x i8>
+ %tmp3 = add <8 x i8> %v1a, %v2b
+ %tmp3a = bitcast <8 x i8> %tmp3 to x86_mmx
+ %tmp4 = tail call i32 (...)* @pass_v8qi( x86_mmx %tmp3a ) nounwind
ret void
}
diff --git a/test/CodeGen/X86/mmx-arith.ll b/test/CodeGen/X86/mmx-arith.ll
index e4dfdbf..6817487 100644
--- a/test/CodeGen/X86/mmx-arith.ll
+++ b/test/CodeGen/X86/mmx-arith.ll
@@ -1,131 +1,309 @@
; RUN: llc < %s -march=x86 -mattr=+mmx
;; A basic sanity check to make sure that MMX arithmetic actually compiles.
+;; First is a straight translation of the original with bitcasts as needed.
-define void @foo(<8 x i8>* %A, <8 x i8>* %B) {
+define void @foo(x86_mmx* %A, x86_mmx* %B) {
entry:
- %tmp1 = load <8 x i8>* %A ; <<8 x i8>> [#uses=1]
- %tmp3 = load <8 x i8>* %B ; <<8 x i8>> [#uses=1]
- %tmp4 = add <8 x i8> %tmp1, %tmp3 ; <<8 x i8>> [#uses=2]
- store <8 x i8> %tmp4, <8 x i8>* %A
- %tmp7 = load <8 x i8>* %B ; <<8 x i8>> [#uses=1]
- %tmp12 = tail call <8 x i8> @llvm.x86.mmx.padds.b( <8 x i8> %tmp4, <8 x i8> %tmp7 ) ; <<8 x i8>> [#uses=2]
- store <8 x i8> %tmp12, <8 x i8>* %A
- %tmp16 = load <8 x i8>* %B ; <<8 x i8>> [#uses=1]
- %tmp21 = tail call <8 x i8> @llvm.x86.mmx.paddus.b( <8 x i8> %tmp12, <8 x i8> %tmp16 ) ; <<8 x i8>> [#uses=2]
- store <8 x i8> %tmp21, <8 x i8>* %A
- %tmp27 = load <8 x i8>* %B ; <<8 x i8>> [#uses=1]
- %tmp28 = sub <8 x i8> %tmp21, %tmp27 ; <<8 x i8>> [#uses=2]
- store <8 x i8> %tmp28, <8 x i8>* %A
- %tmp31 = load <8 x i8>* %B ; <<8 x i8>> [#uses=1]
- %tmp36 = tail call <8 x i8> @llvm.x86.mmx.psubs.b( <8 x i8> %tmp28, <8 x i8> %tmp31 ) ; <<8 x i8>> [#uses=2]
- store <8 x i8> %tmp36, <8 x i8>* %A
- %tmp40 = load <8 x i8>* %B ; <<8 x i8>> [#uses=1]
- %tmp45 = tail call <8 x i8> @llvm.x86.mmx.psubus.b( <8 x i8> %tmp36, <8 x i8> %tmp40 ) ; <<8 x i8>> [#uses=2]
- store <8 x i8> %tmp45, <8 x i8>* %A
- %tmp51 = load <8 x i8>* %B ; <<8 x i8>> [#uses=1]
- %tmp52 = mul <8 x i8> %tmp45, %tmp51 ; <<8 x i8>> [#uses=2]
- store <8 x i8> %tmp52, <8 x i8>* %A
- %tmp57 = load <8 x i8>* %B ; <<8 x i8>> [#uses=1]
- %tmp58 = and <8 x i8> %tmp52, %tmp57 ; <<8 x i8>> [#uses=2]
- store <8 x i8> %tmp58, <8 x i8>* %A
- %tmp63 = load <8 x i8>* %B ; <<8 x i8>> [#uses=1]
- %tmp64 = or <8 x i8> %tmp58, %tmp63 ; <<8 x i8>> [#uses=2]
- store <8 x i8> %tmp64, <8 x i8>* %A
- %tmp69 = load <8 x i8>* %B ; <<8 x i8>> [#uses=1]
- %tmp70 = xor <8 x i8> %tmp64, %tmp69 ; <<8 x i8>> [#uses=1]
- store <8 x i8> %tmp70, <8 x i8>* %A
+ %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1]
+ %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp1a = bitcast x86_mmx %tmp1 to <8 x i8>
+ %tmp3a = bitcast x86_mmx %tmp3 to <8 x i8>
+ %tmp4 = add <8 x i8> %tmp1a, %tmp3a ; <<8 x i8>> [#uses=2]
+ %tmp4a = bitcast <8 x i8> %tmp4 to x86_mmx
+ store x86_mmx %tmp4a, x86_mmx* %A
+ %tmp7 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.b( x86_mmx %tmp4a, x86_mmx %tmp7 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp12, x86_mmx* %A
+ %tmp16 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.b( x86_mmx %tmp12, x86_mmx %tmp16 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp21, x86_mmx* %A
+ %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp21a = bitcast x86_mmx %tmp21 to <8 x i8>
+ %tmp27a = bitcast x86_mmx %tmp27 to <8 x i8>
+ %tmp28 = sub <8 x i8> %tmp21a, %tmp27a ; <<8 x i8>> [#uses=2]
+ %tmp28a = bitcast <8 x i8> %tmp28 to x86_mmx
+ store x86_mmx %tmp28a, x86_mmx* %A
+ %tmp31 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.b( x86_mmx %tmp28a, x86_mmx %tmp31 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp36, x86_mmx* %A
+ %tmp40 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.b( x86_mmx %tmp36, x86_mmx %tmp40 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp45, x86_mmx* %A
+ %tmp51 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp45a = bitcast x86_mmx %tmp45 to <8 x i8>
+ %tmp51a = bitcast x86_mmx %tmp51 to <8 x i8>
+ %tmp52 = mul <8 x i8> %tmp45a, %tmp51a ; <<8 x i8>> [#uses=2]
+ %tmp52a = bitcast <8 x i8> %tmp52 to x86_mmx
+ store x86_mmx %tmp52a, x86_mmx* %A
+ %tmp57 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp57a = bitcast x86_mmx %tmp57 to <8 x i8>
+ %tmp58 = and <8 x i8> %tmp52, %tmp57a ; <<8 x i8>> [#uses=2]
+ %tmp58a = bitcast <8 x i8> %tmp58 to x86_mmx
+ store x86_mmx %tmp58a, x86_mmx* %A
+ %tmp63 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp63a = bitcast x86_mmx %tmp63 to <8 x i8>
+ %tmp64 = or <8 x i8> %tmp58, %tmp63a ; <<8 x i8>> [#uses=2]
+ %tmp64a = bitcast <8 x i8> %tmp64 to x86_mmx
+ store x86_mmx %tmp64a, x86_mmx* %A
+ %tmp69 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp69a = bitcast x86_mmx %tmp69 to <8 x i8>
+ %tmp64b = bitcast x86_mmx %tmp64a to <8 x i8>
+ %tmp70 = xor <8 x i8> %tmp64b, %tmp69a ; <<8 x i8>> [#uses=1]
+ %tmp70a = bitcast <8 x i8> %tmp70 to x86_mmx
+ store x86_mmx %tmp70a, x86_mmx* %A
tail call void @llvm.x86.mmx.emms( )
ret void
}
-define void @baz(<2 x i32>* %A, <2 x i32>* %B) {
+define void @baz(x86_mmx* %A, x86_mmx* %B) {
entry:
- %tmp1 = load <2 x i32>* %A ; <<2 x i32>> [#uses=1]
- %tmp3 = load <2 x i32>* %B ; <<2 x i32>> [#uses=1]
- %tmp4 = add <2 x i32> %tmp1, %tmp3 ; <<2 x i32>> [#uses=2]
- store <2 x i32> %tmp4, <2 x i32>* %A
- %tmp9 = load <2 x i32>* %B ; <<2 x i32>> [#uses=1]
- %tmp10 = sub <2 x i32> %tmp4, %tmp9 ; <<2 x i32>> [#uses=2]
- store <2 x i32> %tmp10, <2 x i32>* %A
- %tmp15 = load <2 x i32>* %B ; <<2 x i32>> [#uses=1]
- %tmp16 = mul <2 x i32> %tmp10, %tmp15 ; <<2 x i32>> [#uses=2]
- store <2 x i32> %tmp16, <2 x i32>* %A
- %tmp21 = load <2 x i32>* %B ; <<2 x i32>> [#uses=1]
- %tmp22 = and <2 x i32> %tmp16, %tmp21 ; <<2 x i32>> [#uses=2]
- store <2 x i32> %tmp22, <2 x i32>* %A
- %tmp27 = load <2 x i32>* %B ; <<2 x i32>> [#uses=1]
- %tmp28 = or <2 x i32> %tmp22, %tmp27 ; <<2 x i32>> [#uses=2]
- store <2 x i32> %tmp28, <2 x i32>* %A
- %tmp33 = load <2 x i32>* %B ; <<2 x i32>> [#uses=1]
- %tmp34 = xor <2 x i32> %tmp28, %tmp33 ; <<2 x i32>> [#uses=1]
- store <2 x i32> %tmp34, <2 x i32>* %A
+ %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1]
+ %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp1a = bitcast x86_mmx %tmp1 to <2 x i32>
+ %tmp3a = bitcast x86_mmx %tmp3 to <2 x i32>
+ %tmp4 = add <2 x i32> %tmp1a, %tmp3a ; <<2 x i32>> [#uses=2]
+ %tmp4a = bitcast <2 x i32> %tmp4 to x86_mmx
+ store x86_mmx %tmp4a, x86_mmx* %A
+ %tmp9 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp9a = bitcast x86_mmx %tmp9 to <2 x i32>
+ %tmp10 = sub <2 x i32> %tmp4, %tmp9a ; <<2 x i32>> [#uses=2]
+ %tmp10a = bitcast <2 x i32> %tmp4 to x86_mmx
+ store x86_mmx %tmp10a, x86_mmx* %A
+ %tmp15 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp10b = bitcast x86_mmx %tmp10a to <2 x i32>
+ %tmp15a = bitcast x86_mmx %tmp15 to <2 x i32>
+ %tmp16 = mul <2 x i32> %tmp10b, %tmp15a ; <<2 x i32>> [#uses=2]
+ %tmp16a = bitcast <2 x i32> %tmp16 to x86_mmx
+ store x86_mmx %tmp16a, x86_mmx* %A
+ %tmp21 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp16b = bitcast x86_mmx %tmp16a to <2 x i32>
+ %tmp21a = bitcast x86_mmx %tmp21 to <2 x i32>
+ %tmp22 = and <2 x i32> %tmp16b, %tmp21a ; <<2 x i32>> [#uses=2]
+ %tmp22a = bitcast <2 x i32> %tmp22 to x86_mmx
+ store x86_mmx %tmp22a, x86_mmx* %A
+ %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp22b = bitcast x86_mmx %tmp22a to <2 x i32>
+ %tmp27a = bitcast x86_mmx %tmp27 to <2 x i32>
+ %tmp28 = or <2 x i32> %tmp22b, %tmp27a ; <<2 x i32>> [#uses=2]
+ %tmp28a = bitcast <2 x i32> %tmp28 to x86_mmx
+ store x86_mmx %tmp28a, x86_mmx* %A
+ %tmp33 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp28b = bitcast x86_mmx %tmp28a to <2 x i32>
+ %tmp33a = bitcast x86_mmx %tmp33 to <2 x i32>
+ %tmp34 = xor <2 x i32> %tmp28b, %tmp33a ; <<2 x i32>> [#uses=1]
+ %tmp34a = bitcast <2 x i32> %tmp34 to x86_mmx
+ store x86_mmx %tmp34a, x86_mmx* %A
tail call void @llvm.x86.mmx.emms( )
ret void
}
-define void @bar(<4 x i16>* %A, <4 x i16>* %B) {
+define void @bar(x86_mmx* %A, x86_mmx* %B) {
entry:
- %tmp1 = load <4 x i16>* %A ; <<4 x i16>> [#uses=1]
- %tmp3 = load <4 x i16>* %B ; <<4 x i16>> [#uses=1]
- %tmp4 = add <4 x i16> %tmp1, %tmp3 ; <<4 x i16>> [#uses=2]
- store <4 x i16> %tmp4, <4 x i16>* %A
- %tmp7 = load <4 x i16>* %B ; <<4 x i16>> [#uses=1]
- %tmp12 = tail call <4 x i16> @llvm.x86.mmx.padds.w( <4 x i16> %tmp4, <4 x i16> %tmp7 ) ; <<4 x i16>> [#uses=2]
- store <4 x i16> %tmp12, <4 x i16>* %A
- %tmp16 = load <4 x i16>* %B ; <<4 x i16>> [#uses=1]
- %tmp21 = tail call <4 x i16> @llvm.x86.mmx.paddus.w( <4 x i16> %tmp12, <4 x i16> %tmp16 ) ; <<4 x i16>> [#uses=2]
- store <4 x i16> %tmp21, <4 x i16>* %A
- %tmp27 = load <4 x i16>* %B ; <<4 x i16>> [#uses=1]
- %tmp28 = sub <4 x i16> %tmp21, %tmp27 ; <<4 x i16>> [#uses=2]
- store <4 x i16> %tmp28, <4 x i16>* %A
- %tmp31 = load <4 x i16>* %B ; <<4 x i16>> [#uses=1]
- %tmp36 = tail call <4 x i16> @llvm.x86.mmx.psubs.w( <4 x i16> %tmp28, <4 x i16> %tmp31 ) ; <<4 x i16>> [#uses=2]
- store <4 x i16> %tmp36, <4 x i16>* %A
- %tmp40 = load <4 x i16>* %B ; <<4 x i16>> [#uses=1]
- %tmp45 = tail call <4 x i16> @llvm.x86.mmx.psubus.w( <4 x i16> %tmp36, <4 x i16> %tmp40 ) ; <<4 x i16>> [#uses=2]
- store <4 x i16> %tmp45, <4 x i16>* %A
- %tmp51 = load <4 x i16>* %B ; <<4 x i16>> [#uses=1]
- %tmp52 = mul <4 x i16> %tmp45, %tmp51 ; <<4 x i16>> [#uses=2]
- store <4 x i16> %tmp52, <4 x i16>* %A
- %tmp55 = load <4 x i16>* %B ; <<4 x i16>> [#uses=1]
- %tmp60 = tail call <4 x i16> @llvm.x86.mmx.pmulh.w( <4 x i16> %tmp52, <4 x i16> %tmp55 ) ; <<4 x i16>> [#uses=2]
- store <4 x i16> %tmp60, <4 x i16>* %A
- %tmp64 = load <4 x i16>* %B ; <<4 x i16>> [#uses=1]
- %tmp69 = tail call <2 x i32> @llvm.x86.mmx.pmadd.wd( <4 x i16> %tmp60, <4 x i16> %tmp64 ) ; <<2 x i32>> [#uses=1]
- %tmp70 = bitcast <2 x i32> %tmp69 to <4 x i16> ; <<4 x i16>> [#uses=2]
- store <4 x i16> %tmp70, <4 x i16>* %A
- %tmp75 = load <4 x i16>* %B ; <<4 x i16>> [#uses=1]
- %tmp76 = and <4 x i16> %tmp70, %tmp75 ; <<4 x i16>> [#uses=2]
- store <4 x i16> %tmp76, <4 x i16>* %A
- %tmp81 = load <4 x i16>* %B ; <<4 x i16>> [#uses=1]
- %tmp82 = or <4 x i16> %tmp76, %tmp81 ; <<4 x i16>> [#uses=2]
- store <4 x i16> %tmp82, <4 x i16>* %A
- %tmp87 = load <4 x i16>* %B ; <<4 x i16>> [#uses=1]
- %tmp88 = xor <4 x i16> %tmp82, %tmp87 ; <<4 x i16>> [#uses=1]
- store <4 x i16> %tmp88, <4 x i16>* %A
+ %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1]
+ %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp1a = bitcast x86_mmx %tmp1 to <4 x i16>
+ %tmp3a = bitcast x86_mmx %tmp3 to <4 x i16>
+ %tmp4 = add <4 x i16> %tmp1a, %tmp3a ; <<4 x i16>> [#uses=2]
+ %tmp4a = bitcast <4 x i16> %tmp4 to x86_mmx
+ store x86_mmx %tmp4a, x86_mmx* %A
+ %tmp7 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.w( x86_mmx %tmp4a, x86_mmx %tmp7 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp12, x86_mmx* %A
+ %tmp16 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.w( x86_mmx %tmp12, x86_mmx %tmp16 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp21, x86_mmx* %A
+ %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp21a = bitcast x86_mmx %tmp21 to <4 x i16>
+ %tmp27a = bitcast x86_mmx %tmp27 to <4 x i16>
+ %tmp28 = sub <4 x i16> %tmp21a, %tmp27a ; <<4 x i16>> [#uses=2]
+ %tmp28a = bitcast <4 x i16> %tmp28 to x86_mmx
+ store x86_mmx %tmp28a, x86_mmx* %A
+ %tmp31 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.w( x86_mmx %tmp28a, x86_mmx %tmp31 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp36, x86_mmx* %A
+ %tmp40 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.w( x86_mmx %tmp36, x86_mmx %tmp40 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp45, x86_mmx* %A
+ %tmp51 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp45a = bitcast x86_mmx %tmp45 to <4 x i16>
+ %tmp51a = bitcast x86_mmx %tmp51 to <4 x i16>
+ %tmp52 = mul <4 x i16> %tmp45a, %tmp51a ; <<4 x i16>> [#uses=2]
+ %tmp52a = bitcast <4 x i16> %tmp52 to x86_mmx
+ store x86_mmx %tmp52a, x86_mmx* %A
+ %tmp55 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp60 = tail call x86_mmx @llvm.x86.mmx.pmulh.w( x86_mmx %tmp52a, x86_mmx %tmp55 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp60, x86_mmx* %A
+ %tmp64 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp69 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd( x86_mmx %tmp60, x86_mmx %tmp64 ) ; <x86_mmx> [#uses=1]
+ %tmp70 = bitcast x86_mmx %tmp69 to x86_mmx ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp70, x86_mmx* %A
+ %tmp75 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp70a = bitcast x86_mmx %tmp70 to <4 x i16>
+ %tmp75a = bitcast x86_mmx %tmp75 to <4 x i16>
+ %tmp76 = and <4 x i16> %tmp70a, %tmp75a ; <<4 x i16>> [#uses=2]
+ %tmp76a = bitcast <4 x i16> %tmp76 to x86_mmx
+ store x86_mmx %tmp76a, x86_mmx* %A
+ %tmp81 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp76b = bitcast x86_mmx %tmp76a to <4 x i16>
+ %tmp81a = bitcast x86_mmx %tmp81 to <4 x i16>
+ %tmp82 = or <4 x i16> %tmp76b, %tmp81a ; <<4 x i16>> [#uses=2]
+ %tmp82a = bitcast <4 x i16> %tmp82 to x86_mmx
+ store x86_mmx %tmp82a, x86_mmx* %A
+ %tmp87 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp82b = bitcast x86_mmx %tmp82a to <4 x i16>
+ %tmp87a = bitcast x86_mmx %tmp87 to <4 x i16>
+ %tmp88 = xor <4 x i16> %tmp82b, %tmp87a ; <<4 x i16>> [#uses=1]
+ %tmp88a = bitcast <4 x i16> %tmp88 to x86_mmx
+ store x86_mmx %tmp88a, x86_mmx* %A
tail call void @llvm.x86.mmx.emms( )
ret void
}
-declare <8 x i8> @llvm.x86.mmx.padds.b(<8 x i8>, <8 x i8>)
+;; The following is modified to use MMX intrinsics everywhere they work.
-declare <8 x i8> @llvm.x86.mmx.paddus.b(<8 x i8>, <8 x i8>)
+define void @fooa(x86_mmx* %A, x86_mmx* %B) {
+entry:
+ %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1]
+ %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp4 = tail call x86_mmx @llvm.x86.mmx.padd.b( x86_mmx %tmp1, x86_mmx %tmp3 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp4, x86_mmx* %A
+ %tmp7 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.b( x86_mmx %tmp4, x86_mmx %tmp7 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp12, x86_mmx* %A
+ %tmp16 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.b( x86_mmx %tmp12, x86_mmx %tmp16 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp21, x86_mmx* %A
+ %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp28 = tail call x86_mmx @llvm.x86.mmx.psub.b( x86_mmx %tmp21, x86_mmx %tmp27 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp28, x86_mmx* %A
+ %tmp31 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.b( x86_mmx %tmp28, x86_mmx %tmp31 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp36, x86_mmx* %A
+ %tmp40 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.b( x86_mmx %tmp36, x86_mmx %tmp40 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp45, x86_mmx* %A
+ %tmp51 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp51a = bitcast x86_mmx %tmp51 to i64
+ %tmp51aa = bitcast i64 %tmp51a to <8 x i8>
+ %tmp51b = bitcast x86_mmx %tmp45 to <8 x i8>
+ %tmp52 = mul <8 x i8> %tmp51b, %tmp51aa ; <x86_mmx> [#uses=2]
+ %tmp52a = bitcast <8 x i8> %tmp52 to i64
+ %tmp52aa = bitcast i64 %tmp52a to x86_mmx
+ store x86_mmx %tmp52aa, x86_mmx* %A
+ %tmp57 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp58 = tail call x86_mmx @llvm.x86.mmx.pand( x86_mmx %tmp51, x86_mmx %tmp57 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp58, x86_mmx* %A
+ %tmp63 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp64 = tail call x86_mmx @llvm.x86.mmx.por( x86_mmx %tmp58, x86_mmx %tmp63 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp64, x86_mmx* %A
+ %tmp69 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp70 = tail call x86_mmx @llvm.x86.mmx.pxor( x86_mmx %tmp64, x86_mmx %tmp69 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp70, x86_mmx* %A
+ tail call void @llvm.x86.mmx.emms( )
+ ret void
+}
-declare <8 x i8> @llvm.x86.mmx.psubs.b(<8 x i8>, <8 x i8>)
+define void @baza(x86_mmx* %A, x86_mmx* %B) {
+entry:
+ %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1]
+ %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp4 = tail call x86_mmx @llvm.x86.mmx.padd.d( x86_mmx %tmp1, x86_mmx %tmp3 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp4, x86_mmx* %A
+ %tmp9 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp10 = tail call x86_mmx @llvm.x86.mmx.psub.d( x86_mmx %tmp4, x86_mmx %tmp9 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp10, x86_mmx* %A
+ %tmp15 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp10a = bitcast x86_mmx %tmp10 to <2 x i32>
+ %tmp15a = bitcast x86_mmx %tmp15 to <2 x i32>
+ %tmp16 = mul <2 x i32> %tmp10a, %tmp15a ; <x86_mmx> [#uses=2]
+ %tmp16a = bitcast <2 x i32> %tmp16 to x86_mmx
+ store x86_mmx %tmp16a, x86_mmx* %A
+ %tmp21 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp22 = tail call x86_mmx @llvm.x86.mmx.pand( x86_mmx %tmp16a, x86_mmx %tmp21 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp22, x86_mmx* %A
+ %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp28 = tail call x86_mmx @llvm.x86.mmx.por( x86_mmx %tmp22, x86_mmx %tmp27 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp28, x86_mmx* %A
+ %tmp33 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp34 = tail call x86_mmx @llvm.x86.mmx.pxor( x86_mmx %tmp28, x86_mmx %tmp33 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp34, x86_mmx* %A
+ tail call void @llvm.x86.mmx.emms( )
+ ret void
+}
-declare <8 x i8> @llvm.x86.mmx.psubus.b(<8 x i8>, <8 x i8>)
+define void @bara(x86_mmx* %A, x86_mmx* %B) {
+entry:
+ %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1]
+ %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp4 = tail call x86_mmx @llvm.x86.mmx.padd.w( x86_mmx %tmp1, x86_mmx %tmp3 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp4, x86_mmx* %A
+ %tmp7 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.w( x86_mmx %tmp4, x86_mmx %tmp7 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp12, x86_mmx* %A
+ %tmp16 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.w( x86_mmx %tmp12, x86_mmx %tmp16 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp21, x86_mmx* %A
+ %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp28 = tail call x86_mmx @llvm.x86.mmx.psub.w( x86_mmx %tmp21, x86_mmx %tmp27 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp28, x86_mmx* %A
+ %tmp31 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.w( x86_mmx %tmp28, x86_mmx %tmp31 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp36, x86_mmx* %A
+ %tmp40 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.w( x86_mmx %tmp36, x86_mmx %tmp40 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp45, x86_mmx* %A
+ %tmp51 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp52 = tail call x86_mmx @llvm.x86.mmx.pmull.w( x86_mmx %tmp45, x86_mmx %tmp51 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp52, x86_mmx* %A
+ %tmp55 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp60 = tail call x86_mmx @llvm.x86.mmx.pmulh.w( x86_mmx %tmp52, x86_mmx %tmp55 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp60, x86_mmx* %A
+ %tmp64 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp69 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd( x86_mmx %tmp60, x86_mmx %tmp64 ) ; <x86_mmx> [#uses=1]
+ %tmp70 = bitcast x86_mmx %tmp69 to x86_mmx ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp70, x86_mmx* %A
+ %tmp75 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp76 = tail call x86_mmx @llvm.x86.mmx.pand( x86_mmx %tmp70, x86_mmx %tmp75 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp76, x86_mmx* %A
+ %tmp81 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp82 = tail call x86_mmx @llvm.x86.mmx.por( x86_mmx %tmp76, x86_mmx %tmp81 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp82, x86_mmx* %A
+ %tmp87 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
+ %tmp88 = tail call x86_mmx @llvm.x86.mmx.pxor( x86_mmx %tmp82, x86_mmx %tmp87 ) ; <x86_mmx> [#uses=2]
+ store x86_mmx %tmp88, x86_mmx* %A
+ tail call void @llvm.x86.mmx.emms( )
+ ret void
+}
-declare <4 x i16> @llvm.x86.mmx.padds.w(<4 x i16>, <4 x i16>)
+declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx)
-declare <4 x i16> @llvm.x86.mmx.paddus.w(<4 x i16>, <4 x i16>)
+declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx)
-declare <4 x i16> @llvm.x86.mmx.psubs.w(<4 x i16>, <4 x i16>)
+declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx)
-declare <4 x i16> @llvm.x86.mmx.psubus.w(<4 x i16>, <4 x i16>)
+declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx)
-declare <4 x i16> @llvm.x86.mmx.pmulh.w(<4 x i16>, <4 x i16>)
+declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx)
-declare <2 x i32> @llvm.x86.mmx.pmadd.wd(<4 x i16>, <4 x i16>)
+declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx)
declare void @llvm.x86.mmx.emms()
+
+declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padds.d(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.psubs.d(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx)
+
diff --git a/test/CodeGen/X86/mmx-bitcast-to-i64.ll b/test/CodeGen/X86/mmx-bitcast-to-i64.ll
index 1fd8f67..8b1840a 100644
--- a/test/CodeGen/X86/mmx-bitcast-to-i64.ll
+++ b/test/CodeGen/X86/mmx-bitcast-to-i64.ll
@@ -1,26 +1,31 @@
; RUN: llc < %s -march=x86-64 | grep movd | count 4
-define i64 @foo(<1 x i64>* %p) {
- %t = load <1 x i64>* %p
- %u = add <1 x i64> %t, %t
- %s = bitcast <1 x i64> %u to i64
+define i64 @foo(x86_mmx* %p) {
+ %t = load x86_mmx* %p
+ %u = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %t, x86_mmx %t)
+ %s = bitcast x86_mmx %u to i64
ret i64 %s
}
-define i64 @goo(<2 x i32>* %p) {
- %t = load <2 x i32>* %p
- %u = add <2 x i32> %t, %t
- %s = bitcast <2 x i32> %u to i64
+define i64 @goo(x86_mmx* %p) {
+ %t = load x86_mmx* %p
+ %u = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %t, x86_mmx %t)
+ %s = bitcast x86_mmx %u to i64
ret i64 %s
}
-define i64 @hoo(<4 x i16>* %p) {
- %t = load <4 x i16>* %p
- %u = add <4 x i16> %t, %t
- %s = bitcast <4 x i16> %u to i64
+define i64 @hoo(x86_mmx* %p) {
+ %t = load x86_mmx* %p
+ %u = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %t, x86_mmx %t)
+ %s = bitcast x86_mmx %u to i64
ret i64 %s
}
-define i64 @ioo(<8 x i8>* %p) {
- %t = load <8 x i8>* %p
- %u = add <8 x i8> %t, %t
- %s = bitcast <8 x i8> %u to i64
+define i64 @ioo(x86_mmx* %p) {
+ %t = load x86_mmx* %p
+ %u = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %t, x86_mmx %t)
+ %s = bitcast x86_mmx %u to i64
ret i64 %s
}
+
+declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx)
diff --git a/test/CodeGen/X86/mmx-builtins.ll b/test/CodeGen/X86/mmx-builtins.ll
new file mode 100644
index 0000000..3ac0e4e
--- /dev/null
+++ b/test/CodeGen/X86/mmx-builtins.ll
@@ -0,0 +1,1324 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3 | FileCheck %s
+
+declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phaddw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %2 = bitcast <4 x i16> %1 to x86_mmx
+ %3 = bitcast <4 x i16> %0 to x86_mmx
+ %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %2, x86_mmx %3) nounwind readnone
+ %5 = bitcast x86_mmx %4 to <4 x i16>
+ %6 = bitcast <4 x i16> %5 to <1 x i64>
+ %7 = extractelement <1 x i64> %6, i32 0
+ ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpgtd
+entry:
+ %0 = bitcast <1 x i64> %b to <2 x i32>
+ %1 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+ %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <2 x i32>
+ %4 = bitcast <2 x i32> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpgtw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpgtb
+entry:
+ %0 = bitcast <1 x i64> %b to <8 x i8>
+ %1 = bitcast <1 x i64> %a to <8 x i8>
+ %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+ %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <8 x i8>
+ %4 = bitcast <8 x i8> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpeqd
+entry:
+ %0 = bitcast <1 x i64> %b to <2 x i32>
+ %1 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+ %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <2 x i32>
+ %4 = bitcast <2 x i32> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpeqw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpeqb
+entry:
+ %0 = bitcast <1 x i64> %b to <8 x i8>
+ %1 = bitcast <1 x i64> %a to <8 x i8>
+ %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+ %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <8 x i8>
+ %4 = bitcast <8 x i8> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpckldq
+entry:
+ %0 = bitcast <1 x i64> %b to <2 x i32>
+ %1 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+ %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <2 x i32>
+ %4 = bitcast <2 x i32> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpcklwd
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpcklbw
+entry:
+ %0 = bitcast <1 x i64> %b to <8 x i8>
+ %1 = bitcast <1 x i64> %a to <8 x i8>
+ %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+ %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <8 x i8>
+ %4 = bitcast <8 x i8> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpckhdq
+entry:
+ %0 = bitcast <1 x i64> %b to <2 x i32>
+ %1 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+ %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <2 x i32>
+ %4 = bitcast <2 x i32> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpckhwd
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpckhbw
+entry:
+ %0 = bitcast <1 x i64> %b to <8 x i8>
+ %1 = bitcast <1 x i64> %a to <8 x i8>
+ %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+ %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <8 x i8>
+ %4 = bitcast <8 x i8> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: packuswb
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <8 x i8>
+ %4 = bitcast <8 x i8> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: packssdw
+entry:
+ %0 = bitcast <1 x i64> %b to <2 x i32>
+ %1 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+ %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: packsswb
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <8 x i8>
+ %4 = bitcast <8 x i8> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone
+
+define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psrad
+entry:
+ %0 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+ %1 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %mmx_var.i, i32 3) nounwind
+ %2 = bitcast x86_mmx %1 to <2 x i32>
+ %3 = bitcast <2 x i32> %2 to <1 x i64>
+ %4 = extractelement <1 x i64> %3, i32 0
+ ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone
+
+define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psraw
+entry:
+ %0 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+ %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 3) nounwind
+ %2 = bitcast x86_mmx %1 to <4 x i16>
+ %3 = bitcast <4 x i16> %2 to <1 x i64>
+ %4 = extractelement <1 x i64> %3, i32 0
+ ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
+
+define i64 @test71(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psrlq
+entry:
+ %0 = extractelement <1 x i64> %a, i32 0
+ %mmx_var.i = bitcast i64 %0 to x86_mmx
+ %1 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %mmx_var.i, i32 3) nounwind
+ %2 = bitcast x86_mmx %1 to i64
+ ret i64 %2
+}
+
+declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone
+
+define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psrld
+entry:
+ %0 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+ %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 3) nounwind
+ %2 = bitcast x86_mmx %1 to <2 x i32>
+ %3 = bitcast <2 x i32> %2 to <1 x i64>
+ %4 = extractelement <1 x i64> %3, i32 0
+ ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone
+
+define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psrlw
+entry:
+ %0 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+ %1 = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %mmx_var.i, i32 3) nounwind
+ %2 = bitcast x86_mmx %1 to <4 x i16>
+ %3 = bitcast <4 x i16> %2 to <1 x i64>
+ %4 = extractelement <1 x i64> %3, i32 0
+ ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone
+
+define i64 @test68(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psllq
+entry:
+ %0 = extractelement <1 x i64> %a, i32 0
+ %mmx_var.i = bitcast i64 %0 to x86_mmx
+ %1 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %mmx_var.i, i32 3) nounwind
+ %2 = bitcast x86_mmx %1 to i64
+ ret i64 %2
+}
+
+declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone
+
+define i64 @test67(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pslld
+entry:
+ %0 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+ %1 = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %mmx_var.i, i32 3) nounwind
+ %2 = bitcast x86_mmx %1 to <2 x i32>
+ %3 = bitcast <2 x i32> %2 to <1 x i64>
+ %4 = extractelement <1 x i64> %3, i32 0
+ ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone
+
+define i64 @test66(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psllw
+entry:
+ %0 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+ %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 3) nounwind
+ %2 = bitcast x86_mmx %1 to <4 x i16>
+ %3 = bitcast <4 x i16> %2 to <1 x i64>
+ %4 = extractelement <1 x i64> %3, i32 0
+ ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psrad
+entry:
+ %0 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+ %1 = extractelement <1 x i64> %b, i32 0
+ %mmx_var1.i = bitcast i64 %1 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <2 x i32>
+ %4 = bitcast <2 x i32> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test64(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psraw
+entry:
+ %0 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+ %1 = extractelement <1 x i64> %b, i32 0
+ %mmx_var1.i = bitcast i64 %1 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test63(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psrlq
+entry:
+ %0 = extractelement <1 x i64> %a, i32 0
+ %mmx_var.i = bitcast i64 %0 to x86_mmx
+ %1 = extractelement <1 x i64> %b, i32 0
+ %mmx_var1.i = bitcast i64 %1 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to i64
+ ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test62(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psrld
+entry:
+ %0 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+ %1 = extractelement <1 x i64> %b, i32 0
+ %mmx_var1.i = bitcast i64 %1 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <2 x i32>
+ %4 = bitcast <2 x i32> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test61(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psrlw
+entry:
+ %0 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+ %1 = extractelement <1 x i64> %b, i32 0
+ %mmx_var1.i = bitcast i64 %1 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test60(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psllq
+entry:
+ %0 = extractelement <1 x i64> %a, i32 0
+ %mmx_var.i = bitcast i64 %0 to x86_mmx
+ %1 = extractelement <1 x i64> %b, i32 0
+ %mmx_var1.i = bitcast i64 %1 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to i64
+ ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test59(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pslld
+entry:
+ %0 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+ %1 = extractelement <1 x i64> %b, i32 0
+ %mmx_var1.i = bitcast i64 %1 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <2 x i32>
+ %4 = bitcast <2 x i32> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test58(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psllw
+entry:
+ %0 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+ %1 = extractelement <1 x i64> %b, i32 0
+ %mmx_var1.i = bitcast i64 %1 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test56(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pxor
+entry:
+ %0 = bitcast <1 x i64> %b to <2 x i32>
+ %1 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+ %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <2 x i32>
+ %4 = bitcast <2 x i32> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test55(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: por
+entry:
+ %0 = bitcast <1 x i64> %b to <2 x i32>
+ %1 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+ %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.por(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <2 x i32>
+ %4 = bitcast <2 x i32> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test54(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pandn
+entry:
+ %0 = bitcast <1 x i64> %b to <2 x i32>
+ %1 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+ %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <2 x i32>
+ %4 = bitcast <2 x i32> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test53(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pand
+entry:
+ %0 = bitcast <1 x i64> %b to <2 x i32>
+ %1 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+ %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pand(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <2 x i32>
+ %4 = bitcast <2 x i32> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test52(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmullw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+define i64 @test51(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmullw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test50(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmulhw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test49(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmaddwd
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <2 x i32>
+ %4 = bitcast <2 x i32> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test48(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubusw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test47(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubusb
+entry:
+ %0 = bitcast <1 x i64> %b to <8 x i8>
+ %1 = bitcast <1 x i64> %a to <8 x i8>
+ %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+ %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <8 x i8>
+ %4 = bitcast <8 x i8> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test46(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubsw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test45(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubsb
+entry:
+ %0 = bitcast <1 x i64> %b to <8 x i8>
+ %1 = bitcast <1 x i64> %a to <8 x i8>
+ %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+ %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <8 x i8>
+ %4 = bitcast <8 x i8> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+define i64 @test44(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubq
+entry:
+ %0 = extractelement <1 x i64> %a, i32 0
+ %mmx_var = bitcast i64 %0 to x86_mmx
+ %1 = extractelement <1 x i64> %b, i32 0
+ %mmx_var1 = bitcast i64 %1 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
+ %3 = bitcast x86_mmx %2 to i64
+ ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone
+
+declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test43(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubd
+entry:
+ %0 = bitcast <1 x i64> %b to <2 x i32>
+ %1 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+ %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <2 x i32>
+ %4 = bitcast <2 x i32> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test42(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test41(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubb
+entry:
+ %0 = bitcast <1 x i64> %b to <8 x i8>
+ %1 = bitcast <1 x i64> %a to <8 x i8>
+ %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+ %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <8 x i8>
+ %4 = bitcast <8 x i8> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test40(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddusw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test39(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddusb
+entry:
+ %0 = bitcast <1 x i64> %b to <8 x i8>
+ %1 = bitcast <1 x i64> %a to <8 x i8>
+ %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+ %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <8 x i8>
+ %4 = bitcast <8 x i8> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test38(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddsw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test37(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddsb
+entry:
+ %0 = bitcast <1 x i64> %b to <8 x i8>
+ %1 = bitcast <1 x i64> %a to <8 x i8>
+ %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+ %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <8 x i8>
+ %4 = bitcast <8 x i8> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test36(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddq
+entry:
+ %0 = extractelement <1 x i64> %a, i32 0
+ %mmx_var = bitcast i64 %0 to x86_mmx
+ %1 = extractelement <1 x i64> %b, i32 0
+ %mmx_var1 = bitcast i64 %1 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
+ %3 = bitcast x86_mmx %2 to i64
+ ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test35(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddd
+entry:
+ %0 = bitcast <1 x i64> %b to <2 x i32>
+ %1 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+ %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <2 x i32>
+ %4 = bitcast <2 x i32> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test34(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test33(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddb
+entry:
+ %0 = bitcast <1 x i64> %b to <8 x i8>
+ %1 = bitcast <1 x i64> %a to <8 x i8>
+ %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+ %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <8 x i8>
+ %4 = bitcast <8 x i8> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test32(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psadbw
+entry:
+ %0 = bitcast <1 x i64> %b to <8 x i8>
+ %1 = bitcast <1 x i64> %a to <8 x i8>
+ %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+ %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to i64
+ ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test31(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pminsw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test30(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pminub
+entry:
+ %0 = bitcast <1 x i64> %b to <8 x i8>
+ %1 = bitcast <1 x i64> %a to <8 x i8>
+ %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+ %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <8 x i8>
+ %4 = bitcast <8 x i8> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test29(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmaxsw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test28(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmaxub
+entry:
+ %0 = bitcast <1 x i64> %b to <8 x i8>
+ %1 = bitcast <1 x i64> %a to <8 x i8>
+ %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+ %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <8 x i8>
+ %4 = bitcast <8 x i8> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test27(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pavgw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test26(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pavgb
+entry:
+ %0 = bitcast <1 x i64> %b to <8 x i8>
+ %1 = bitcast <1 x i64> %a to <8 x i8>
+ %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+ %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <8 x i8>
+ %4 = bitcast <8 x i8> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare void @llvm.x86.mmx.movnt.dq(x86_mmx*, x86_mmx) nounwind
+
+define void @test25(<1 x i64>* %p, <1 x i64> %a) nounwind optsize ssp {
+; CHECK: movntq
+entry:
+ %mmx_ptr_var.i = bitcast <1 x i64>* %p to x86_mmx*
+ %0 = extractelement <1 x i64> %a, i32 0
+ %mmx_var.i = bitcast i64 %0 to x86_mmx
+ tail call void @llvm.x86.mmx.movnt.dq(x86_mmx* %mmx_ptr_var.i, x86_mmx %mmx_var.i) nounwind
+ ret void
+}
+
+declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone
+
+define i32 @test24(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pmovmskb
+entry:
+ %0 = bitcast <1 x i64> %a to <8 x i8>
+ %mmx_var.i = bitcast <8 x i8> %0 to x86_mmx
+ %1 = tail call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %mmx_var.i) nounwind
+ ret i32 %1
+}
+
+declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind
+
+define void @test23(<1 x i64> %d, <1 x i64> %n, i8* %p) nounwind optsize ssp {
+; CHECK: maskmovq
+entry:
+ %0 = bitcast <1 x i64> %n to <8 x i8>
+ %1 = bitcast <1 x i64> %d to <8 x i8>
+ %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+ %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+ tail call void @llvm.x86.mmx.maskmovq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i, i8* %p) nounwind
+ ret void
+}
+
+declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test22(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmulhuw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+ %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone
+
+define i64 @test21(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pshufw
+entry:
+ %0 = bitcast <1 x i64> %a to <4 x i16>
+ %1 = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmuludq
+entry:
+ %0 = bitcast <1 x i64> %b to <2 x i32>
+ %1 = bitcast <1 x i64> %a to <2 x i32>
+ %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+ %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+ %3 = bitcast x86_mmx %2 to i64
+ ret i64 %3
+}
+
+declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone
+
+define <2 x double> @test19(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: cvtpi2pd
+entry:
+ %0 = bitcast <1 x i64> %a to <2 x i32>
+ %1 = bitcast <2 x i32> %0 to x86_mmx
+ %2 = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %1) nounwind readnone
+ ret <2 x double> %2
+}
+
+declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone
+
+define i64 @test18(<2 x double> %a) nounwind readnone optsize ssp {
+; CHECK: cvttpd2pi
+entry:
+ %0 = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone
+ %1 = bitcast x86_mmx %0 to <2 x i32>
+ %2 = bitcast <2 x i32> %1 to <1 x i64>
+ %3 = extractelement <1 x i64> %2, i32 0
+ ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone
+
+define i64 @test17(<2 x double> %a) nounwind readnone optsize ssp {
+; CHECK: cvtpd2pi
+entry:
+ %0 = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone
+ %1 = bitcast x86_mmx %0 to <2 x i32>
+ %2 = bitcast <2 x i32> %1 to <1 x i64>
+ %3 = extractelement <1 x i64> %2, i32 0
+ ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone
+
+define i64 @test16(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: palignr
+entry:
+ %0 = extractelement <1 x i64> %a, i32 0
+ %mmx_var = bitcast i64 %0 to x86_mmx
+ %1 = extractelement <1 x i64> %b, i32 0
+ %mmx_var1 = bitcast i64 %1 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %mmx_var, x86_mmx %mmx_var1, i8 16)
+ %3 = bitcast x86_mmx %2 to i64
+ ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone
+
+define i64 @test15(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pabsd
+entry:
+ %0 = bitcast <1 x i64> %a to <2 x i32>
+ %1 = bitcast <2 x i32> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1) nounwind readnone
+ %3 = bitcast x86_mmx %2 to <2 x i32>
+ %4 = bitcast <2 x i32> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone
+
+define i64 @test14(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pabsw
+entry:
+ %0 = bitcast <1 x i64> %a to <4 x i16>
+ %1 = bitcast <4 x i16> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1) nounwind readnone
+ %3 = bitcast x86_mmx %2 to <4 x i16>
+ %4 = bitcast <4 x i16> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone
+
+define i64 @test13(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pabsb
+entry:
+ %0 = bitcast <1 x i64> %a to <8 x i8>
+ %1 = bitcast <8 x i8> %0 to x86_mmx
+ %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1) nounwind readnone
+ %3 = bitcast x86_mmx %2 to <8 x i8>
+ %4 = bitcast <8 x i8> %3 to <1 x i64>
+ %5 = extractelement <1 x i64> %4, i32 0
+ ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test12(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psignd
+entry:
+ %0 = bitcast <1 x i64> %b to <2 x i32>
+ %1 = bitcast <1 x i64> %a to <2 x i32>
+ %2 = bitcast <2 x i32> %1 to x86_mmx
+ %3 = bitcast <2 x i32> %0 to x86_mmx
+ %4 = tail call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %2, x86_mmx %3) nounwind readnone
+ %5 = bitcast x86_mmx %4 to <2 x i32>
+ %6 = bitcast <2 x i32> %5 to <1 x i64>
+ %7 = extractelement <1 x i64> %6, i32 0
+ ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test11(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psignw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %2 = bitcast <4 x i16> %1 to x86_mmx
+ %3 = bitcast <4 x i16> %0 to x86_mmx
+ %4 = tail call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %2, x86_mmx %3) nounwind readnone
+ %5 = bitcast x86_mmx %4 to <4 x i16>
+ %6 = bitcast <4 x i16> %5 to <1 x i64>
+ %7 = extractelement <1 x i64> %6, i32 0
+ ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test10(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psignb
+entry:
+ %0 = bitcast <1 x i64> %b to <8 x i8>
+ %1 = bitcast <1 x i64> %a to <8 x i8>
+ %2 = bitcast <8 x i8> %1 to x86_mmx
+ %3 = bitcast <8 x i8> %0 to x86_mmx
+ %4 = tail call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %2, x86_mmx %3) nounwind readnone
+ %5 = bitcast x86_mmx %4 to <8 x i8>
+ %6 = bitcast <8 x i8> %5 to <1 x i64>
+ %7 = extractelement <1 x i64> %6, i32 0
+ ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test9(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pshufb
+entry:
+ %0 = bitcast <1 x i64> %b to <8 x i8>
+ %1 = bitcast <1 x i64> %a to <8 x i8>
+ %2 = bitcast <8 x i8> %1 to x86_mmx
+ %3 = bitcast <8 x i8> %0 to x86_mmx
+ %4 = tail call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %2, x86_mmx %3) nounwind readnone
+ %5 = bitcast x86_mmx %4 to <8 x i8>
+ %6 = bitcast <8 x i8> %5 to <1 x i64>
+ %7 = extractelement <1 x i64> %6, i32 0
+ ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test8(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmulhrsw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %2 = bitcast <4 x i16> %1 to x86_mmx
+ %3 = bitcast <4 x i16> %0 to x86_mmx
+ %4 = tail call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
+ %5 = bitcast x86_mmx %4 to <4 x i16>
+ %6 = bitcast <4 x i16> %5 to <1 x i64>
+ %7 = extractelement <1 x i64> %6, i32 0
+ ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test7(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmaddubsw
+entry:
+ %0 = bitcast <1 x i64> %b to <8 x i8>
+ %1 = bitcast <1 x i64> %a to <8 x i8>
+ %2 = bitcast <8 x i8> %1 to x86_mmx
+ %3 = bitcast <8 x i8> %0 to x86_mmx
+ %4 = tail call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
+ %5 = bitcast x86_mmx %4 to <8 x i8>
+ %6 = bitcast <8 x i8> %5 to <1 x i64>
+ %7 = extractelement <1 x i64> %6, i32 0
+ ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test6(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phsubsw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %2 = bitcast <4 x i16> %1 to x86_mmx
+ %3 = bitcast <4 x i16> %0 to x86_mmx
+ %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
+ %5 = bitcast x86_mmx %4 to <4 x i16>
+ %6 = bitcast <4 x i16> %5 to <1 x i64>
+ %7 = extractelement <1 x i64> %6, i32 0
+ ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test5(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phsubd
+entry:
+ %0 = bitcast <1 x i64> %b to <2 x i32>
+ %1 = bitcast <1 x i64> %a to <2 x i32>
+ %2 = bitcast <2 x i32> %1 to x86_mmx
+ %3 = bitcast <2 x i32> %0 to x86_mmx
+ %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %2, x86_mmx %3) nounwind readnone
+ %5 = bitcast x86_mmx %4 to <2 x i32>
+ %6 = bitcast <2 x i32> %5 to <1 x i64>
+ %7 = extractelement <1 x i64> %6, i32 0
+ ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test4(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phsubw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %2 = bitcast <4 x i16> %1 to x86_mmx
+ %3 = bitcast <4 x i16> %0 to x86_mmx
+ %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %2, x86_mmx %3) nounwind readnone
+ %5 = bitcast x86_mmx %4 to <4 x i16>
+ %6 = bitcast <4 x i16> %5 to <1 x i64>
+ %7 = extractelement <1 x i64> %6, i32 0
+ ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test3(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phaddsw
+entry:
+ %0 = bitcast <1 x i64> %b to <4 x i16>
+ %1 = bitcast <1 x i64> %a to <4 x i16>
+ %2 = bitcast <4 x i16> %1 to x86_mmx
+ %3 = bitcast <4 x i16> %0 to x86_mmx
+ %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
+ %5 = bitcast x86_mmx %4 to <4 x i16>
+ %6 = bitcast <4 x i16> %5 to <1 x i64>
+ %7 = extractelement <1 x i64> %6, i32 0
+ ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test2(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phaddd
+entry:
+ %0 = bitcast <1 x i64> %b to <2 x i32>
+ %1 = bitcast <1 x i64> %a to <2 x i32>
+ %2 = bitcast <2 x i32> %1 to x86_mmx
+ %3 = bitcast <2 x i32> %0 to x86_mmx
+ %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %2, x86_mmx %3) nounwind readnone
+ %5 = bitcast x86_mmx %4 to <2 x i32>
+ %6 = bitcast <2 x i32> %5 to <1 x i64>
+ %7 = extractelement <1 x i64> %6, i32 0
+ ret i64 %7
+}
diff --git a/test/CodeGen/X86/mmx-insert-element.ll b/test/CodeGen/X86/mmx-insert-element.ll
index a063ee1..348dac8 100644
--- a/test/CodeGen/X86/mmx-insert-element.ll
+++ b/test/CodeGen/X86/mmx-insert-element.ll
@@ -1,7 +1,9 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx | not grep movq
-; RUN: llc < %s -march=x86 -mattr=+mmx | grep psllq
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | grep movq
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | grep pshufd
+; This is not an MMX operation; promoted to XMM.
-define <2 x i32> @qux(i32 %A) nounwind {
+define x86_mmx @qux(i32 %A) nounwind {
%tmp3 = insertelement <2 x i32> < i32 0, i32 undef >, i32 %A, i32 1 ; <<2 x i32>> [#uses=1]
- ret <2 x i32> %tmp3
+ %tmp4 = bitcast <2 x i32> %tmp3 to x86_mmx
+ ret x86_mmx %tmp4
}
diff --git a/test/CodeGen/X86/mmx-pinsrw.ll b/test/CodeGen/X86/mmx-pinsrw.ll
index 3af09f4..6062b50 100644
--- a/test/CodeGen/X86/mmx-pinsrw.ll
+++ b/test/CodeGen/X86/mmx-pinsrw.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx | grep pinsrw | count 1
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | grep pinsrw | count 1
; PR2562
external global i16 ; <i16*>:0 [#uses=1]
diff --git a/test/CodeGen/X86/mmx-punpckhdq.ll b/test/CodeGen/X86/mmx-punpckhdq.ll
index 0af7e01..689f7bf 100644
--- a/test/CodeGen/X86/mmx-punpckhdq.ll
+++ b/test/CodeGen/X86/mmx-punpckhdq.ll
@@ -1,6 +1,9 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx | grep punpckhdq | count 1
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse42 -mtriple=x86_64-apple-darwin10 | FileCheck %s
+; There are no MMX operations in bork; promoted to XMM.
define void @bork(<1 x i64>* %x) {
+; CHECK: bork
+; CHECK: pextrd
entry:
%tmp2 = load <1 x i64>* %x ; <<1 x i64>> [#uses=1]
%tmp6 = bitcast <1 x i64> %tmp2 to <2 x i32> ; <<2 x i32>> [#uses=1]
@@ -11,4 +14,18 @@ entry:
ret void
}
+; pork uses MMX.
+
+define void @pork(x86_mmx* %x) {
+; CHECK: pork
+; CHECK: punpckhdq
+entry:
+ %tmp2 = load x86_mmx* %x ; <x86_mmx> [#uses=1]
+ %tmp9 = tail call x86_mmx @llvm.x86.mmx.punpckhdq (x86_mmx %tmp2, x86_mmx %tmp2)
+ store x86_mmx %tmp9, x86_mmx* %x
+ tail call void @llvm.x86.mmx.emms( )
+ ret void
+}
+
+declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx)
declare void @llvm.x86.mmx.emms()
diff --git a/test/CodeGen/X86/mmx-shift.ll b/test/CodeGen/X86/mmx-shift.ll
index dd0aa2c..bafc754 100644
--- a/test/CodeGen/X86/mmx-shift.ll
+++ b/test/CodeGen/X86/mmx-shift.ll
@@ -5,28 +5,28 @@
define i64 @t1(<1 x i64> %mm1) nounwind {
entry:
- %tmp6 = tail call <1 x i64> @llvm.x86.mmx.pslli.q( <1 x i64> %mm1, i32 32 ) ; <<1 x i64>> [#uses=1]
- %retval1112 = bitcast <1 x i64> %tmp6 to i64 ; <i64> [#uses=1]
+ %tmp = bitcast <1 x i64> %mm1 to x86_mmx
+ %tmp6 = tail call x86_mmx @llvm.x86.mmx.pslli.q( x86_mmx %tmp, i32 32 ) ; <x86_mmx> [#uses=1]
+ %retval1112 = bitcast x86_mmx %tmp6 to i64
ret i64 %retval1112
}
-declare <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64>, i32) nounwind readnone
+declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone
-define i64 @t2(<2 x i32> %mm1, <2 x i32> %mm2) nounwind {
+define i64 @t2(x86_mmx %mm1, x86_mmx %mm2) nounwind {
entry:
- %tmp7 = tail call <2 x i32> @llvm.x86.mmx.psra.d( <2 x i32> %mm1, <2 x i32> %mm2 ) nounwind readnone ; <<2 x i32>> [#uses=1]
- %retval1112 = bitcast <2 x i32> %tmp7 to i64 ; <i64> [#uses=1]
+ %tmp7 = tail call x86_mmx @llvm.x86.mmx.psra.d( x86_mmx %mm1, x86_mmx %mm2 ) nounwind readnone ; <x86_mmx> [#uses=1]
+ %retval1112 = bitcast x86_mmx %tmp7 to i64
ret i64 %retval1112
}
-declare <2 x i32> @llvm.x86.mmx.psra.d(<2 x i32>, <2 x i32>) nounwind readnone
+declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
-define i64 @t3(<1 x i64> %mm1, i32 %bits) nounwind {
+define i64 @t3(x86_mmx %mm1, i32 %bits) nounwind {
entry:
- %tmp6 = bitcast <1 x i64> %mm1 to <4 x i16> ; <<4 x i16>> [#uses=1]
- %tmp8 = tail call <4 x i16> @llvm.x86.mmx.psrli.w( <4 x i16> %tmp6, i32 %bits ) nounwind readnone ; <<4 x i16>> [#uses=1]
- %retval1314 = bitcast <4 x i16> %tmp8 to i64 ; <i64> [#uses=1]
+ %tmp8 = tail call x86_mmx @llvm.x86.mmx.psrli.w( x86_mmx %mm1, i32 %bits ) nounwind readnone ; <x86_mmx> [#uses=1]
+ %retval1314 = bitcast x86_mmx %tmp8 to i64
ret i64 %retval1314
}
-declare <4 x i16> @llvm.x86.mmx.psrli.w(<4 x i16>, i32) nounwind readnone
+declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone
diff --git a/test/CodeGen/X86/mmx-shuffle.ll b/test/CodeGen/X86/mmx-shuffle.ll
index e3125c7..9f7501e 100644
--- a/test/CodeGen/X86/mmx-shuffle.ll
+++ b/test/CodeGen/X86/mmx-shuffle.ll
@@ -22,8 +22,10 @@ entry:
%tmp542 = bitcast <2 x i32> %tmp529 to <4 x i16> ; <<4 x i16>> [#uses=1]
%tmp543 = add <4 x i16> %tmp542, < i16 0, i16 16448, i16 24672, i16 28784 > ; <<4 x i16>> [#uses=1]
%tmp555 = bitcast <4 x i16> %tmp543 to <8 x i8> ; <<8 x i8>> [#uses=1]
- tail call void @llvm.x86.mmx.maskmovq( <8 x i8> zeroinitializer, <8 x i8> %tmp555, i8* null )
+ %tmp556 = bitcast <8 x i8> %tmp555 to x86_mmx
+ %tmp557 = bitcast <8 x i8> zeroinitializer to x86_mmx
+ tail call void @llvm.x86.mmx.maskmovq( x86_mmx %tmp557, x86_mmx %tmp556, i8* null )
ret void
}
-declare void @llvm.x86.mmx.maskmovq(<8 x i8>, <8 x i8>, i8*)
+declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*)
diff --git a/test/CodeGen/X86/mmx-vzmovl-2.ll b/test/CodeGen/X86/mmx-vzmovl-2.ll
index 8253c20..a7ce7d9 100644
--- a/test/CodeGen/X86/mmx-vzmovl-2.ll
+++ b/test/CodeGen/X86/mmx-vzmovl-2.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep pxor
-; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep punpckldq
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep pxor
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep punpckldq
%struct.vS1024 = type { [8 x <4 x i32>] }
%struct.vS512 = type { [4 x <4 x i32>] }
-declare <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64>, i32) nounwind readnone
+declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
define void @t() nounwind {
entry:
@@ -12,14 +12,18 @@ entry:
bb554: ; preds = %bb554, %entry
%sum.0.reg2mem.0 = phi <1 x i64> [ %tmp562, %bb554 ], [ zeroinitializer, %entry ] ; <<1 x i64>> [#uses=1]
- %0 = load <1 x i64>* null, align 8 ; <<1 x i64>> [#uses=2]
- %1 = bitcast <1 x i64> %0 to <2 x i32> ; <<2 x i32>> [#uses=1]
+ %0 = load x86_mmx* null, align 8 ; <<1 x i64>> [#uses=2]
+ %1 = bitcast x86_mmx %0 to <2 x i32> ; <<2 x i32>> [#uses=1]
%tmp555 = and <2 x i32> %1, < i32 -1, i32 0 > ; <<2 x i32>> [#uses=1]
- %2 = bitcast <2 x i32> %tmp555 to <1 x i64> ; <<1 x i64>> [#uses=1]
- %3 = call <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64> %0, i32 32) nounwind readnone ; <<1 x i64>> [#uses=1]
+ %2 = bitcast <2 x i32> %tmp555 to x86_mmx ; <<1 x i64>> [#uses=1]
+ %3 = call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %0, i32 32) nounwind readnone ; <<1 x i64>> [#uses=1]
store <1 x i64> %sum.0.reg2mem.0, <1 x i64>* null
- %tmp558 = add <1 x i64> %sum.0.reg2mem.0, %2 ; <<1 x i64>> [#uses=1]
- %4 = call <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64> %tmp558, i32 32) nounwind readnone ; <<1 x i64>> [#uses=1]
- %tmp562 = add <1 x i64> %4, %3 ; <<1 x i64>> [#uses=1]
+ %tmp3 = bitcast x86_mmx %2 to <1 x i64>
+ %tmp558 = add <1 x i64> %sum.0.reg2mem.0, %tmp3 ; <<1 x i64>> [#uses=1]
+ %tmp5 = bitcast <1 x i64> %tmp558 to x86_mmx
+ %4 = call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %tmp5, i32 32) nounwind readnone ; <<1 x i64>> [#uses=1]
+ %tmp6 = bitcast x86_mmx %4 to <1 x i64>
+ %tmp7 = bitcast x86_mmx %3 to <1 x i64>
+ %tmp562 = add <1 x i64> %tmp6, %tmp7 ; <<1 x i64>> [#uses=1]
br label %bb554
}
diff --git a/test/CodeGen/X86/mmx-vzmovl.ll b/test/CodeGen/X86/mmx-vzmovl.ll
index d21e240..191e261 100644
--- a/test/CodeGen/X86/mmx-vzmovl.ll
+++ b/test/CodeGen/X86/mmx-vzmovl.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep movd
-; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep movq
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep movq | count 2
+; There are no MMX operations here; this is promoted to XMM.
define void @foo(<1 x i64>* %a, <1 x i64>* %b) nounwind {
entry:
diff --git a/test/CodeGen/X86/movgs.ll b/test/CodeGen/X86/movgs.ll
index b04048b..00190e8 100644
--- a/test/CodeGen/X86/movgs.ll
+++ b/test/CodeGen/X86/movgs.ll
@@ -1,8 +1,57 @@
-; RUN: llc < %s -march=x86 | grep gs
+; RUN: llc < %s -march=x86 -mattr=sse41 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -march=x86-64 -mattr=sse41 | FileCheck %s --check-prefix=X64
-define i32 @foo() nounwind readonly {
+define i32 @test1() nounwind readonly {
entry:
%tmp = load i32* addrspace(256)* getelementptr (i32* addrspace(256)* inttoptr (i32 72 to i32* addrspace(256)*), i32 31) ; <i32*> [#uses=1]
%tmp1 = load i32* %tmp ; <i32> [#uses=1]
ret i32 %tmp1
}
+; X32: test1:
+; X32: movl %gs:196, %eax
+; X32: movl (%eax), %eax
+; X32: ret
+
+; X64: test1:
+; X64: movq %gs:320, %rax
+; X64: movl (%rax), %eax
+; X64: ret
+
+define i64 @test2(void (i8*)* addrspace(256)* %tmp8) nounwind {
+entry:
+ %tmp9 = load void (i8*)* addrspace(256)* %tmp8, align 8
+ tail call void %tmp9(i8* undef) nounwind optsize
+ ret i64 0
+}
+
+; rdar://8453210
+; X32: test2:
+; X32: movl {{.*}}(%esp), %eax
+; X32: calll *%gs:(%eax)
+
+; X64: test2:
+; X64: callq *%gs:(%rdi)
+
+
+
+
+define <2 x i64> @pmovsxwd_1(i64 addrspace(256)* %p) nounwind readonly {
+entry:
+ %0 = load i64 addrspace(256)* %p
+ %tmp2 = insertelement <2 x i64> zeroinitializer, i64 %0, i32 0
+ %1 = bitcast <2 x i64> %tmp2 to <8 x i16>
+ %2 = tail call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1) nounwind readnone
+ %3 = bitcast <4 x i32> %2 to <2 x i64>
+ ret <2 x i64> %3
+
+; X32: pmovsxwd_1:
+; X32: movl 4(%esp), %eax
+; X32: pmovsxwd %gs:(%eax), %xmm0
+; X32: ret
+
+; X64: pmovsxwd_1:
+; X64: pmovsxwd %gs:(%rdi), %xmm0
+; X64: ret
+}
+
+declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
diff --git a/test/CodeGen/X86/mult-alt-generic-i686.ll b/test/CodeGen/X86/mult-alt-generic-i686.ll
new file mode 100644
index 0000000..7c3499f
--- /dev/null
+++ b/test/CodeGen/X86/mult-alt-generic-i686.ll
@@ -0,0 +1,321 @@
+; RUN: llc < %s -march=x86
+; ModuleID = 'mult-alt-generic.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i686"
+
+@mout0 = common global i32 0, align 4
+@min1 = common global i32 0, align 4
+@marray = common global [2 x i32] zeroinitializer, align 4
+
+define void @single_m() nounwind {
+entry:
+ call void asm "foo $1,$0", "=*m,*m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32* @min1) nounwind
+ ret void
+}
+
+define void @single_o() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %index = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %index, align 4
+ ret void
+}
+
+define void @single_V() nounwind {
+entry:
+ ret void
+}
+
+define void @single_lt() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r,<r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ %tmp1 = load i32* %in1, align 4
+ %1 = call i32 asm "foo $1,$0", "=r,r<,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+ store i32 %1, i32* %out0, align 4
+ ret void
+}
+
+define void @single_gt() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r,>r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ %tmp1 = load i32* %in1, align 4
+ %1 = call i32 asm "foo $1,$0", "=r,r>,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+ store i32 %1, i32* %out0, align 4
+ ret void
+}
+
+define void @single_r() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r,r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ ret void
+}
+
+define void @single_i() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ %0 = call i32 asm "foo $1,$0", "=r,i,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+ store i32 %0, i32* %out0, align 4
+ ret void
+}
+
+define void @single_n() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ %0 = call i32 asm "foo $1,$0", "=r,n,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+ store i32 %0, i32* %out0, align 4
+ ret void
+}
+
+define void @single_E() nounwind {
+entry:
+ %out0 = alloca double, align 8
+ store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+; %0 = call double asm "foo $1,$0", "=r,E,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+; store double %0, double* %out0, align 8
+ ret void
+}
+
+define void @single_F() nounwind {
+entry:
+ %out0 = alloca double, align 8
+ store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+; %0 = call double asm "foo $1,$0", "=r,F,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+; store double %0, double* %out0, align 8
+ ret void
+}
+
+define void @single_s() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ ret void
+}
+
+define void @single_g() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ %tmp1 = load i32* @min1, align 4
+ %1 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+ store i32 %1, i32* %out0, align 4
+ %2 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+ store i32 %2, i32* %out0, align 4
+ ret void
+}
+
+define void @single_X() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ %tmp1 = load i32* @min1, align 4
+ %1 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+ store i32 %1, i32* %out0, align 4
+ %2 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+ store i32 %2, i32* %out0, align 4
+ %3 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ store i32 %3, i32* %out0, align 4
+ %4 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+ store i32 %4, i32* %out0, align 4
+ %5 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+ store i32 %5, i32* %out0, align 4
+ ret void
+}
+
+define void @single_p() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ %0 = call i32 asm "foo $1,$0", "=r,im,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ store i32 %0, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_m() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ call void asm "foo $1,$0", "=*m|r,m|r,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+ ret void
+}
+
+define void @multi_o() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %index = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %index, align 4
+ ret void
+}
+
+define void @multi_V() nounwind {
+entry:
+ ret void
+}
+
+define void @multi_lt() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|<r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ %tmp1 = load i32* %in1, align 4
+ %1 = call i32 asm "foo $1,$0", "=r|r,r|r<,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+ store i32 %1, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_gt() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|>r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ %tmp1 = load i32* %in1, align 4
+ %1 = call i32 asm "foo $1,$0", "=r|r,r|r>,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+ store i32 %1, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_r() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|m,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_i() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|i,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+ store i32 %0, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_n() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|n,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+ store i32 %0, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_E() nounwind {
+entry:
+ %out0 = alloca double, align 8
+ store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+; %0 = call double asm "foo $1,$0", "=r|r,r|E,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+; store double %0, double* %out0, align 8
+ ret void
+}
+
+define void @multi_F() nounwind {
+entry:
+ %out0 = alloca double, align 8
+ store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+; %0 = call double asm "foo $1,$0", "=r|r,r|F,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+; store double %0, double* %out0, align 8
+ ret void
+}
+
+define void @multi_s() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_g() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ %tmp1 = load i32* @min1, align 4
+ %1 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+ store i32 %1, i32* %out0, align 4
+ %2 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+ store i32 %2, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_X() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ %tmp1 = load i32* @min1, align 4
+ %1 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+ store i32 %1, i32* %out0, align 4
+ %2 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+ store i32 %2, i32* %out0, align 4
+ %3 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ store i32 %3, i32* %out0, align 4
+ %4 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+ store i32 %4, i32* %out0, align 4
+ %5 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+ store i32 %5, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_p() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|im,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ store i32 %0, i32* %out0, align 4
+ ret void
+}
diff --git a/test/CodeGen/X86/mult-alt-generic-x86_64.ll b/test/CodeGen/X86/mult-alt-generic-x86_64.ll
new file mode 100644
index 0000000..f35bb5e
--- /dev/null
+++ b/test/CodeGen/X86/mult-alt-generic-x86_64.ll
@@ -0,0 +1,321 @@
+; RUN: llc < %s -march=x86-64
+; ModuleID = 'mult-alt-generic.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64"
+
+@mout0 = common global i32 0, align 4
+@min1 = common global i32 0, align 4
+@marray = common global [2 x i32] zeroinitializer, align 4
+
+define void @single_m() nounwind {
+entry:
+ call void asm "foo $1,$0", "=*m,*m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32* @min1) nounwind
+ ret void
+}
+
+define void @single_o() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %index = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %index, align 4
+ ret void
+}
+
+define void @single_V() nounwind {
+entry:
+ ret void
+}
+
+define void @single_lt() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r,<r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ %tmp1 = load i32* %in1, align 4
+ %1 = call i32 asm "foo $1,$0", "=r,r<,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+ store i32 %1, i32* %out0, align 4
+ ret void
+}
+
+define void @single_gt() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r,>r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ %tmp1 = load i32* %in1, align 4
+ %1 = call i32 asm "foo $1,$0", "=r,r>,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+ store i32 %1, i32* %out0, align 4
+ ret void
+}
+
+define void @single_r() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r,r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ ret void
+}
+
+define void @single_i() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ %0 = call i32 asm "foo $1,$0", "=r,i,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+ store i32 %0, i32* %out0, align 4
+ ret void
+}
+
+define void @single_n() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ %0 = call i32 asm "foo $1,$0", "=r,n,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+ store i32 %0, i32* %out0, align 4
+ ret void
+}
+
+define void @single_E() nounwind {
+entry:
+ %out0 = alloca double, align 8
+ store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+; %0 = call double asm "foo $1,$0", "=r,E,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+; store double %0, double* %out0, align 8
+ ret void
+}
+
+define void @single_F() nounwind {
+entry:
+ %out0 = alloca double, align 8
+ store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+; %0 = call double asm "foo $1,$0", "=r,F,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+; store double %0, double* %out0, align 8
+ ret void
+}
+
+define void @single_s() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ ret void
+}
+
+define void @single_g() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ %tmp1 = load i32* @min1, align 4
+ %1 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+ store i32 %1, i32* %out0, align 4
+ %2 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+ store i32 %2, i32* %out0, align 4
+ ret void
+}
+
+define void @single_X() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ %tmp1 = load i32* @min1, align 4
+ %1 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+ store i32 %1, i32* %out0, align 4
+ %2 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+ store i32 %2, i32* %out0, align 4
+ %3 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ store i32 %3, i32* %out0, align 4
+ %4 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+ store i32 %4, i32* %out0, align 4
+ %5 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+ store i32 %5, i32* %out0, align 4
+ ret void
+}
+
+define void @single_p() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ %0 = call i32 asm "foo $1,$0", "=r,im,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ store i32 %0, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_m() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ call void asm "foo $1,$0", "=*m|r,m|r,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+ ret void
+}
+
+define void @multi_o() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %index = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %index, align 4
+ ret void
+}
+
+define void @multi_V() nounwind {
+entry:
+ ret void
+}
+
+define void @multi_lt() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|<r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ %tmp1 = load i32* %in1, align 4
+ %1 = call i32 asm "foo $1,$0", "=r|r,r|r<,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+ store i32 %1, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_gt() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|>r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ %tmp1 = load i32* %in1, align 4
+ %1 = call i32 asm "foo $1,$0", "=r|r,r|r>,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+ store i32 %1, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_r() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|m,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_i() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|i,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+ store i32 %0, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_n() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|n,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+ store i32 %0, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_E() nounwind {
+entry:
+ %out0 = alloca double, align 8
+ store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+; %0 = call double asm "foo $1,$0", "=r|r,r|E,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+; store double %0, double* %out0, align 8
+ ret void
+}
+
+define void @multi_F() nounwind {
+entry:
+ %out0 = alloca double, align 8
+ store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+; %0 = call double asm "foo $1,$0", "=r|r,r|F,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+; store double %0, double* %out0, align 8
+ ret void
+}
+
+define void @multi_s() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_g() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ %tmp1 = load i32* @min1, align 4
+ %1 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+ store i32 %1, i32* %out0, align 4
+ %2 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+ store i32 %2, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_X() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ %in1 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ store i32 1, i32* %in1, align 4
+ %tmp = load i32* %in1, align 4
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* %out0, align 4
+ %tmp1 = load i32* @min1, align 4
+ %1 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+ store i32 %1, i32* %out0, align 4
+ %2 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+ store i32 %2, i32* %out0, align 4
+ %3 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ store i32 %3, i32* %out0, align 4
+ %4 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+ store i32 %4, i32* %out0, align 4
+ %5 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+ store i32 %5, i32* %out0, align 4
+ ret void
+}
+
+define void @multi_p() nounwind {
+entry:
+ %out0 = alloca i32, align 4
+ store i32 0, i32* %out0, align 4
+ %0 = call i32 asm "foo $1,$0", "=r|r,r|im,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+ store i32 %0, i32* %out0, align 4
+ ret void
+}
diff --git a/test/CodeGen/X86/mult-alt-x86.ll b/test/CodeGen/X86/mult-alt-x86.ll
new file mode 100644
index 0000000..06175da
--- /dev/null
+++ b/test/CodeGen/X86/mult-alt-x86.ll
@@ -0,0 +1,358 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+; ModuleID = 'mult-alt-x86.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i686-pc-win32"
+
+@mout0 = common global i32 0, align 4
+@min1 = common global i32 0, align 4
+@dout0 = common global double 0.000000e+000, align 8
+@din1 = common global double 0.000000e+000, align 8
+@marray = common global [2 x i32] zeroinitializer, align 4
+
+define void @single_R() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ %0 = call i32 asm "foo $1,$0", "=R,R,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* @mout0, align 4
+ ret void
+}
+
+define void @single_q() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ %0 = call i32 asm "foo $1,$0", "=q,q,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* @mout0, align 4
+ ret void
+}
+
+define void @single_Q() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ %0 = call i32 asm "foo $1,$0", "=Q,Q,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* @mout0, align 4
+ ret void
+}
+
+define void @single_a() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ %0 = call i32 asm "foo $1,$0", "={ax},{ax},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* @mout0, align 4
+ ret void
+}
+
+define void @single_b() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ %0 = call i32 asm "foo $1,$0", "={bx},{bx},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* @mout0, align 4
+ ret void
+}
+
+define void @single_c() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ %0 = call i32 asm "foo $1,$0", "={cx},{cx},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* @mout0, align 4
+ ret void
+}
+
+define void @single_d() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ %0 = call i32 asm "foo $1,$0", "={dx},{dx},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* @mout0, align 4
+ ret void
+}
+
+define void @single_S() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ %0 = call i32 asm "foo $1,$0", "={si},{si},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* @mout0, align 4
+ ret void
+}
+
+define void @single_D() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ %0 = call i32 asm "foo $1,$0", "={di},{di},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* @mout0, align 4
+ ret void
+}
+
+define void @single_A() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ %0 = call i32 asm "foo $1,$0", "=A,A,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+ store i32 %0, i32* @mout0, align 4
+ ret void
+}
+
+define void @single_f() nounwind {
+entry:
+ ret void
+}
+
+define void @single_t() nounwind {
+entry:
+ ret void
+}
+
+define void @single_u() nounwind {
+entry:
+ ret void
+}
+
+define void @single_y() nounwind {
+entry:
+ %tmp = load double* @din1, align 8
+ %0 = call double asm "foo $1,$0", "=y,y,~{dirflag},~{fpsr},~{flags}"(double %tmp) nounwind
+ store double %0, double* @dout0, align 8
+ ret void
+}
+
+define void @single_x() nounwind {
+entry:
+ %tmp = load double* @din1, align 8
+ %0 = call double asm "foo $1,$0", "=x,x,~{dirflag},~{fpsr},~{flags}"(double %tmp) nounwind
+ store double %0, double* @dout0, align 8
+ ret void
+}
+
+define void @single_Y0() nounwind {
+entry:
+ ret void
+}
+
+define void @single_I() nounwind {
+entry:
+ call void asm "foo $1,$0", "=*m,I,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+ ret void
+}
+
+define void @single_J() nounwind {
+entry:
+ call void asm "foo $1,$0", "=*m,J,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+ ret void
+}
+
+define void @single_K() nounwind {
+entry:
+ call void asm "foo $1,$0", "=*m,K,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+ ret void
+}
+
+define void @single_L() nounwind {
+entry:
+; Missing lowering support for 'L'.
+; call void asm "foo $1,$0", "=*m,L,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+ ret void
+}
+
+define void @single_M() nounwind {
+entry:
+; Missing lowering support for 'M'.
+; call void asm "foo $1,$0", "=*m,M,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+ ret void
+}
+
+define void @single_N() nounwind {
+entry:
+ call void asm "foo $1,$0", "=*m,N,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+ ret void
+}
+
+define void @single_G() nounwind {
+entry:
+; Missing lowering support for 'G'.
+; call void asm "foo $1,$0", "=*m,G,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, double 1.000000e+000) nounwind
+ ret void
+}
+
+define void @single_C() nounwind {
+entry:
+; Missing lowering support for 'C'.
+; call void asm "foo $1,$0", "=*m,C,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, double 1.000000e+000) nounwind
+ ret void
+}
+
+define void @single_e() nounwind {
+entry:
+ call void asm "foo $1,$0", "=*m,e,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+ ret void
+}
+
+define void @single_Z() nounwind {
+entry:
+ call void asm "foo $1,$0", "=*m,Z,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+ ret void
+}
+
+define void @multi_R() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ call void asm "foo $1,$0", "=*r|R|m,r|R|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+ ret void
+}
+
+define void @multi_q() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ call void asm "foo $1,$0", "=*r|q|m,r|q|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+ ret void
+}
+
+define void @multi_Q() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ call void asm "foo $1,$0", "=*r|Q|m,r|Q|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+ ret void
+}
+
+define void @multi_a() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ call void asm "foo $1,$0", "=*r|{ax}|m,r|{ax}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+ ret void
+}
+
+define void @multi_b() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ call void asm "foo $1,$0", "=*r|{bx}|m,r|{bx}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+ ret void
+}
+
+define void @multi_c() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ call void asm "foo $1,$0", "=*r|{cx}|m,r|{cx}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+ ret void
+}
+
+define void @multi_d() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ call void asm "foo $1,$0", "=*r|{dx}|m,r|{dx},~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+ ret void
+}
+
+define void @multi_S() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ call void asm "foo $1,$0", "=*r|{si}|m,r|{si}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+ ret void
+}
+
+define void @multi_D() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ call void asm "foo $1,$0", "=*r|{di}|m,r|{di}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+ ret void
+}
+
+define void @multi_A() nounwind {
+entry:
+ %tmp = load i32* @min1, align 4
+ call void asm "foo $1,$0", "=*r|A|m,r|A|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+ ret void
+}
+
+define void @multi_f() nounwind {
+entry:
+ ret void
+}
+
+define void @multi_t() nounwind {
+entry:
+ ret void
+}
+
+define void @multi_u() nounwind {
+entry:
+ ret void
+}
+
+define void @multi_y() nounwind {
+entry:
+ %tmp = load double* @din1, align 8
+ call void asm "foo $1,$0", "=*r|y|m,r|y|m,~{dirflag},~{fpsr},~{flags}"(double* @dout0, double %tmp) nounwind
+ ret void
+}
+
+define void @multi_x() nounwind {
+entry:
+ %tmp = load double* @din1, align 8
+ call void asm "foo $1,$0", "=*r|x|m,r|x|m,~{dirflag},~{fpsr},~{flags}"(double* @dout0, double %tmp) nounwind
+ ret void
+}
+
+define void @multi_Y0() nounwind {
+entry:
+ ret void
+}
+
+define void @multi_I() nounwind {
+entry:
+ call void asm "foo $1,$0", "=*r|m|m,r|I|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+ ret void
+}
+
+define void @multi_J() nounwind {
+entry:
+ call void asm "foo $1,$0", "=*r|m|m,r|J|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+ ret void
+}
+
+define void @multi_K() nounwind {
+entry:
+ call void asm "foo $1,$0", "=*r|m|m,r|K|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+ ret void
+}
+
+define void @multi_L() nounwind {
+entry:
+; Missing lowering support for 'L'.
+; call void asm "foo $1,$0", "=*r|m|m,r|L|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+ ret void
+}
+
+define void @multi_M() nounwind {
+entry:
+; Missing lowering support for 'M'.
+; call void asm "foo $1,$0", "=*r|m|m,r|M|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+ ret void
+}
+
+define void @multi_N() nounwind {
+entry:
+ call void asm "foo $1,$0", "=*r|m|m,r|N|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+ ret void
+}
+
+define void @multi_G() nounwind {
+entry:
+; Missing lowering support for 'G'.
+; call void asm "foo $1,$0", "=*r|m|m,r|G|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, double 1.000000e+000) nounwind
+ ret void
+}
+
+define void @multi_C() nounwind {
+entry:
+; Missing lowering support for 'C'.
+; call void asm "foo $1,$0", "=*r|m|m,r|C|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, double 1.000000e+000) nounwind
+ ret void
+}
+
+define void @multi_e() nounwind {
+entry:
+ call void asm "foo $1,$0", "=*r|m|m,r|e|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+ ret void
+}
+
+define void @multi_Z() nounwind {
+entry:
+ call void asm "foo $1,$0", "=*r|m|m,r|Z|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+ ret void
+}
diff --git a/test/CodeGen/X86/narrow-shl-load.ll b/test/CodeGen/X86/narrow-shl-load.ll
new file mode 100644
index 0000000..53b0388
--- /dev/null
+++ b/test/CodeGen/X86/narrow-shl-load.ll
@@ -0,0 +1,65 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-pc-linux-gnu"
+
+; DAGCombiner should fold this code in finite time.
+; rdar://8606584
+
+define void @D() nounwind readnone {
+bb.nph:
+ br label %while.cond
+
+while.cond: ; preds = %while.cond, %bb.nph
+ %tmp6 = load i32* undef, align 4
+ %and = or i64 undef, undef
+ %conv11 = zext i32 undef to i64
+ %conv14 = zext i32 %tmp6 to i64
+ %shl15 = shl i64 %conv14, 1
+ %shl15.masked = and i64 %shl15, 4294967294
+ %and17 = or i64 %shl15.masked, %conv11
+ %add = add i64 %and17, 1
+ %xor = xor i64 %add, %and
+ %tmp20 = load i64* undef, align 8
+ %add21 = add i64 %xor, %tmp20
+ %conv22 = trunc i64 %add21 to i32
+ store i32 %conv22, i32* undef, align 4
+ br i1 false, label %while.end, label %while.cond
+
+while.end: ; preds = %while.cond
+ ret void
+}
+
+
+; DAGCombiner shouldn't fold the sdiv (ashr) away.
+; rdar://8636812
+; CHECK: main:
+; CHECK: sarl
+
+define i32 @main() nounwind {
+entry:
+ %i = alloca i32, align 4
+ %j = alloca i8, align 1
+ store i32 127, i32* %i, align 4
+ store i8 0, i8* %j, align 1
+ %tmp3 = load i32* %i, align 4
+ %mul = mul nsw i32 %tmp3, 2
+ %conv4 = trunc i32 %mul to i8
+ %conv5 = sext i8 %conv4 to i32
+ %div6 = sdiv i32 %conv5, 2
+ %conv7 = trunc i32 %div6 to i8
+ %conv9 = sext i8 %conv7 to i32
+ %cmp = icmp eq i32 %conv9, -1
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ ret i32 0
+
+if.end: ; preds = %entry
+ call void @abort() noreturn
+ unreachable
+}
+
+declare void @abort() noreturn
+
+declare void @exit(i32) noreturn
diff --git a/test/CodeGen/X86/narrow_op-2.ll b/test/CodeGen/X86/narrow_op-2.ll
deleted file mode 100644
index 796ef7a..0000000
--- a/test/CodeGen/X86/narrow_op-2.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
-
- %struct.bf = type { i64, i16, i16, i32 }
-@bfi = external global %struct.bf*
-
-define void @t1() nounwind ssp {
-entry:
-
-; CHECK: andb $-2, 10(
-; CHECK: andb $-3, 10(
-
- %0 = load %struct.bf** @bfi, align 8
- %1 = getelementptr %struct.bf* %0, i64 0, i32 1
- %2 = bitcast i16* %1 to i32*
- %3 = load i32* %2, align 1
- %4 = and i32 %3, -65537
- store i32 %4, i32* %2, align 1
- %5 = load %struct.bf** @bfi, align 8
- %6 = getelementptr %struct.bf* %5, i64 0, i32 1
- %7 = bitcast i16* %6 to i32*
- %8 = load i32* %7, align 1
- %9 = and i32 %8, -131073
- store i32 %9, i32* %7, align 1
- ret void
-}
diff --git a/test/CodeGen/X86/phi-immediate-factoring.ll b/test/CodeGen/X86/phi-immediate-factoring.ll
index 8bed624..ef02af2 100644
--- a/test/CodeGen/X86/phi-immediate-factoring.ll
+++ b/test/CodeGen/X86/phi-immediate-factoring.ll
@@ -1,5 +1,5 @@
+; RUN: llc < %s -march=x86 -stats |& grep {Number of blocks eliminated} | grep 6
; PR1296
-; RUN: llc < %s -march=x86 | grep {movl \$1} | count 1
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
target triple = "i686-apple-darwin8"
diff --git a/test/CodeGen/X86/phys_subreg_coalesce-2.ll b/test/CodeGen/X86/phys_subreg_coalesce-2.ll
index 23c509c..13e804d 100644
--- a/test/CodeGen/X86/phys_subreg_coalesce-2.ll
+++ b/test/CodeGen/X86/phys_subreg_coalesce-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep mov | count 5
+; RUN: llc < %s -march=x86 | grep mov | count 4
; PR2659
define i32 @binomial(i32 %n, i32 %k) nounwind {
diff --git a/test/CodeGen/X86/pic.ll b/test/CodeGen/X86/pic.ll
index a1a9759..dc5fcd7 100644
--- a/test/CodeGen/X86/pic.ll
+++ b/test/CodeGen/X86/pic.ll
@@ -12,7 +12,7 @@ entry:
ret void
; LINUX: test0:
-; LINUX: call .L0$pb
+; LINUX: calll .L0$pb
; LINUX-NEXT: .L0$pb:
; LINUX-NEXT: popl
; LINUX: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L0$pb),
@@ -34,7 +34,7 @@ entry:
ret void
; LINUX: test1:
-; LINUX: call .L1$pb
+; LINUX: calll .L1$pb
; LINUX-NEXT: .L1$pb:
; LINUX-NEXT: popl
; LINUX: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L1$pb), %eax
@@ -54,12 +54,12 @@ entry:
; LINUX: test2:
; LINUX: pushl %ebx
; LINUX-NEXT: subl $8, %esp
-; LINUX-NEXT: call .L2$pb
+; LINUX-NEXT: calll .L2$pb
; LINUX-NEXT: .L2$pb:
; LINUX-NEXT: popl %ebx
; LINUX: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L2$pb), %ebx
; LINUX: movl $40, (%esp)
-; LINUX: call malloc@PLT
+; LINUX: calll malloc@PLT
; LINUX: addl $8, %esp
; LINUX: popl %ebx
; LINUX: ret
@@ -75,13 +75,13 @@ entry:
call void(...)* %tmp1()
ret void
; LINUX: test3:
-; LINUX: call .L3$pb
+; LINUX: calll .L3$pb
; LINUX-NEXT: .L3$pb:
; LINUX: popl
; LINUX: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L3$pb), %[[REG3:e..]]
; LINUX: movl pfoo@GOT(%[[REG3]]),
-; LINUX: call afoo@PLT
-; LINUX: call *
+; LINUX: calll afoo@PLT
+; LINUX: calll *
}
declare void(...)* @afoo(...)
@@ -91,10 +91,10 @@ entry:
call void(...)* @foo()
ret void
; LINUX: test4:
-; LINUX: call .L4$pb
+; LINUX: calll .L4$pb
; LINUX: popl %ebx
; LINUX: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L4$pb), %ebx
-; LINUX: call foo@PLT
+; LINUX: calll foo@PLT
}
declare void @foo(...)
@@ -112,7 +112,7 @@ entry:
ret void
; LINUX: test5:
-; LINUX: call .L5$pb
+; LINUX: calll .L5$pb
; LINUX-NEXT: .L5$pb:
; LINUX-NEXT: popl %eax
; LINUX: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L5$pb), %eax
@@ -134,7 +134,7 @@ entry:
; LINUX: .LCPI6_0:
; LINUX: test6:
-; LINUX: call .L6$pb
+; LINUX: calll .L6$pb
; LINUX: .L6$pb:
; LINUX: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L6$pb),
; LINUX: fldl .LCPI6_0@GOTOFF(
@@ -186,7 +186,7 @@ bb12:
ret void
; LINUX: test7:
-; LINUX: call .L7$pb
+; LINUX: calll .L7$pb
; LINUX: .L7$pb:
; LINUX: addl $_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L7$pb),
; LINUX: .LJTI7_0@GOTOFF(
diff --git a/test/CodeGen/X86/postra-licm.ll b/test/CodeGen/X86/postra-licm.ll
index 97cc7b4..902c69b 100644
--- a/test/CodeGen/X86/postra-licm.ll
+++ b/test/CodeGen/X86/postra-licm.ll
@@ -68,7 +68,7 @@ bb26.preheader: ; preds = %imix_test.exit
bb23: ; preds = %imix_test.exit
unreachable
-; X86-32: %bb26.preheader.bb28_crit_edge
+; X86-32: %bb26.preheader
; X86-32: movl -16(%ebp),
; X86-32-NEXT: .align 4
; X86-32-NEXT: %bb28
diff --git a/test/CodeGen/X86/pr2659.ll b/test/CodeGen/X86/pr2659.ll
index e5daf5d..54d043d 100644
--- a/test/CodeGen/X86/pr2659.ll
+++ b/test/CodeGen/X86/pr2659.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin9.4.0 | grep movl | count 5
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin9.4.0 | grep movl | count 4
; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin9.4.0 | FileCheck %s
; PR2659
@@ -14,10 +14,11 @@ forcond.preheader: ; preds = %entry
%cmp44 = icmp eq i32 %k, 0 ; <i1> [#uses=1]
br i1 %cmp44, label %afterfor, label %forbody
-; CHECK: %forcond.preheader.forbody_crit_edge
+; CHECK: %forcond.preheader
; CHECK: movl $1
; CHECK-NOT: xorl
-; CHECK-NEXT: movl
+; CHECK-NOT: movl
+; CHECK-NEXT: je
ifthen: ; preds = %entry
ret i32 0
diff --git a/test/CodeGen/X86/pr3522.ll b/test/CodeGen/X86/pr3522.ll
index 7cdeaa0..da16237 100644
--- a/test/CodeGen/X86/pr3522.ll
+++ b/test/CodeGen/X86/pr3522.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -stats |& not grep machine-sink
+; RUN: llc < %s -march=x86 -stats |& not grep {instructions sunk}
; PR3522
target triple = "i386-pc-linux-gnu"
diff --git a/test/CodeGen/X86/shift-folding.ll b/test/CodeGen/X86/shift-folding.ll
index 48ca36c..d9c3061 100644
--- a/test/CodeGen/X86/shift-folding.ll
+++ b/test/CodeGen/X86/shift-folding.ll
@@ -1,21 +1,21 @@
; RUN: llc < %s -march=x86 | \
; RUN: grep {s\[ah\]\[rl\]l} | count 1
-define i32* @test1(i32* %P, i32 %X) {
+define i32* @test1(i32* %P, i32 %X) nounwind {
%Y = lshr i32 %X, 2 ; <i32> [#uses=1]
%gep.upgrd.1 = zext i32 %Y to i64 ; <i64> [#uses=1]
%P2 = getelementptr i32* %P, i64 %gep.upgrd.1 ; <i32*> [#uses=1]
ret i32* %P2
}
-define i32* @test2(i32* %P, i32 %X) {
+define i32* @test2(i32* %P, i32 %X) nounwind {
%Y = shl i32 %X, 2 ; <i32> [#uses=1]
%gep.upgrd.2 = zext i32 %Y to i64 ; <i64> [#uses=1]
%P2 = getelementptr i32* %P, i64 %gep.upgrd.2 ; <i32*> [#uses=1]
ret i32* %P2
}
-define i32* @test3(i32* %P, i32 %X) {
+define i32* @test3(i32* %P, i32 %X) nounwind {
%Y = ashr i32 %X, 2 ; <i32> [#uses=1]
%P2 = getelementptr i32* %P, i32 %Y ; <i32*> [#uses=1]
ret i32* %P2
diff --git a/test/CodeGen/X86/sibcall-3.ll b/test/CodeGen/X86/sibcall-3.ll
index f0d66cf..f97abe0 100644
--- a/test/CodeGen/X86/sibcall-3.ll
+++ b/test/CodeGen/X86/sibcall-3.ll
@@ -3,7 +3,7 @@
define void @t1(i8* inreg %dst, i8* inreg %src, i8* inreg %len) nounwind {
; CHECK: t1:
-; CHECK: call 0
+; CHECK: calll 0
tail call void null(i8* inreg %dst, i8* inreg %src, i8* inreg %len) nounwind
ret void
}
diff --git a/test/CodeGen/X86/sibcall.ll b/test/CodeGen/X86/sibcall.ll
index a3c9957..de2a81e 100644
--- a/test/CodeGen/X86/sibcall.ll
+++ b/test/CodeGen/X86/sibcall.ll
@@ -1,7 +1,5 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=32
-; RUN: llc < %s -march=x86-64 -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=64
-; Darwin 8 generates stubs, which don't match
-; XFAIL: apple-darwin8
+; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=32
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=64
define void @t1(i32 %x) nounwind ssp {
entry:
@@ -45,7 +43,7 @@ declare i32 @foo3()
define void @t4(void (i32)* nocapture %x) nounwind ssp {
entry:
; 32: t4:
-; 32: call *
+; 32: calll *
; FIXME: gcc can generate a tailcall for this. But it's tricky.
; 64: t4:
@@ -71,7 +69,7 @@ entry:
define i32 @t6(i32 %x) nounwind ssp {
entry:
; 32: t6:
-; 32: call {{_?}}t6
+; 32: calll {{_?}}t6
; 32: jmp {{_?}}bar
; 64: t6:
@@ -108,7 +106,7 @@ declare i32 @bar2(i32, i32, i32)
define signext i16 @t8() nounwind ssp {
entry:
; 32: t8:
-; 32: call {{_?}}bar3
+; 32: calll {{_?}}bar3
; 64: t8:
; 64: callq {{_?}}bar3
@@ -121,7 +119,7 @@ declare signext i16 @bar3()
define signext i16 @t9(i32 (i32)* nocapture %x) nounwind ssp {
entry:
; 32: t9:
-; 32: call *
+; 32: calll *
; 64: t9:
; 64: callq *
@@ -133,7 +131,7 @@ entry:
define void @t10() nounwind ssp {
entry:
; 32: t10:
-; 32: call
+; 32: calll
; 64: t10:
; 64: callq
@@ -205,12 +203,12 @@ declare i32 @foo6(i32, i32, %struct.t* byval align 4)
define %struct.ns* @t13(%struct.cp* %yy) nounwind ssp {
; 32: t13:
; 32-NOT: jmp
-; 32: call
+; 32: calll
; 32: ret
; 64: t13:
; 64-NOT: jmp
-; 64: call
+; 64: callq
; 64: ret
entry:
%0 = tail call fastcc %struct.ns* @foo7(%struct.cp* byval align 4 %yy, i8 signext 0) nounwind
@@ -248,7 +246,7 @@ entry:
define void @t15(%struct.foo* noalias sret %agg.result) nounwind {
; 32: t15:
-; 32: call {{_?}}f
+; 32: calll {{_?}}f
; 32: ret $4
; 64: t15:
@@ -263,7 +261,7 @@ declare void @f(%struct.foo* noalias sret) nounwind
define void @t16() nounwind ssp {
entry:
; 32: t16:
-; 32: call {{_?}}bar4
+; 32: calll {{_?}}bar4
; 32: fstp
; 64: t16:
@@ -293,7 +291,7 @@ declare void @bar5(...)
define void @t18() nounwind ssp {
entry:
; 32: t18:
-; 32: call {{_?}}bar6
+; 32: calll {{_?}}bar6
; 32: fstp %st(0)
; 64: t18:
@@ -309,7 +307,7 @@ define void @t19() alignstack(32) nounwind {
entry:
; CHECK: t19:
; CHECK: andl $-32
-; CHECK: call {{_?}}foo
+; CHECK: calll {{_?}}foo
tail call void @foo() nounwind
ret void
}
@@ -323,7 +321,7 @@ declare void @foo()
define double @t20(double %x) nounwind {
entry:
; 32: t20:
-; 32: call {{_?}}foo20
+; 32: calll {{_?}}foo20
; 32: fldl (%esp)
; 64: t20:
diff --git a/test/CodeGen/X86/sink-hoist.ll b/test/CodeGen/X86/sink-hoist.ll
index acba528..31f41ee 100644
--- a/test/CodeGen/X86/sink-hoist.ll
+++ b/test/CodeGen/X86/sink-hoist.ll
@@ -6,10 +6,11 @@
; that it's conditionally evaluated.
; CHECK: foo:
-; CHECK: divsd
; CHECK-NEXT: testb $1, %dil
-; CHECK-NEXT: jne
+; CHECK-NEXT: je
; CHECK-NEXT: divsd
+; CHECK-NEXT: ret
+; CHECK: divsd
define double @foo(double %x, double %y, i1 %c) nounwind {
%a = fdiv double %x, 3.2
@@ -18,6 +19,24 @@ define double @foo(double %x, double %y, i1 %c) nounwind {
ret double %z
}
+; Make sure the critical edge is broken so the divsd is sunken below
+; the conditional branch.
+; rdar://8454886
+
+; CHECK: split:
+; CHECK-NEXT: testb $1, %dil
+; CHECK-NEXT: je
+; CHECK-NEXT: divsd
+; CHECK-NEXT: ret
+; CHECK: movaps
+; CHECK-NEXT: ret
+define double @split(double %x, double %y, i1 %c) nounwind {
+ %a = fdiv double %x, 3.2
+ %z = select i1 %c, double %a, double %y
+ ret double %z
+}
+
+
; Hoist floating-point constant-pool loads out of loops.
; CHECK: bar:
@@ -68,9 +87,9 @@ return:
; Codegen should hoist and CSE these constants.
; CHECK: vv:
-; CHECK: LCPI2_0(%rip), %xmm0
-; CHECK: LCPI2_1(%rip), %xmm1
-; CHECK: LCPI2_2(%rip), %xmm2
+; CHECK: LCPI3_0(%rip), %xmm0
+; CHECK: LCPI3_1(%rip), %xmm1
+; CHECK: LCPI3_2(%rip), %xmm2
; CHECK: align
; CHECK-NOT: LCPI
; CHECK: ret
diff --git a/test/CodeGen/X86/sse1.ll b/test/CodeGen/X86/sse1.ll
index 6e7aad8..73f88ae 100644
--- a/test/CodeGen/X86/sse1.ll
+++ b/test/CodeGen/X86/sse1.ll
@@ -18,3 +18,28 @@ define <8 x i16> @test2(<8 x i32> %a) nounwind {
; %c = sext <4 x i16> %a to <4 x i32> ; <<4 x i32>> [#uses=1]
; ret <4 x i32> %c
;}
+
+; This should not emit shuffles to populate the top 2 elements of the 4-element
+; vector that this ends up returning.
+; rdar://8368414
+define <2 x float> @test4(<2 x float> %A, <2 x float> %B) nounwind {
+entry:
+ %tmp7 = extractelement <2 x float> %A, i32 0
+ %tmp5 = extractelement <2 x float> %A, i32 1
+ %tmp3 = extractelement <2 x float> %B, i32 0
+ %tmp1 = extractelement <2 x float> %B, i32 1
+ %add.r = fadd float %tmp7, %tmp3
+ %add.i = fsub float %tmp5, %tmp1
+ %tmp11 = insertelement <2 x float> undef, float %add.r, i32 0
+ %tmp9 = insertelement <2 x float> %tmp11, float %add.i, i32 1
+ ret <2 x float> %tmp9
+; CHECK: test4:
+; CHECK-NOT: shufps $16
+; CHECK: shufps $1,
+; CHECK-NOT: shufps $16
+; CHECK: shufps $1,
+; CHECK-NOT: shufps $16
+; CHECK: unpcklps
+; CHECK-NOT: shufps $16
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll
index 20b8eac..6fc0190 100644
--- a/test/CodeGen/X86/sse2.ll
+++ b/test/CodeGen/X86/sse2.ll
@@ -1,14 +1,14 @@
; Tests for SSE2 and below, without SSE3+.
; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=pentium4 -O3 | FileCheck %s
-define void @t1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind {
+define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind {
%tmp3 = load <2 x double>* %A, align 16
%tmp7 = insertelement <2 x double> undef, double %B, i32 0
%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 >
store <2 x double> %tmp9, <2 x double>* %r, align 16
ret void
-; CHECK: t1:
+; CHECK: test1:
; CHECK: movl 8(%esp), %eax
; CHECK-NEXT: movapd (%eax), %xmm0
; CHECK-NEXT: movlpd 12(%esp), %xmm0
@@ -17,14 +17,14 @@ define void @t1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind {
; CHECK-NEXT: ret
}
-define void @t2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind {
+define void @test2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind {
%tmp3 = load <2 x double>* %A, align 16
%tmp7 = insertelement <2 x double> undef, double %B, i32 0
%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 0, i32 2 >
store <2 x double> %tmp9, <2 x double>* %r, align 16
ret void
-; CHECK: t2:
+; CHECK: test2:
; CHECK: movl 8(%esp), %eax
; CHECK-NEXT: movapd (%eax), %xmm0
; CHECK-NEXT: movhpd 12(%esp), %xmm0
@@ -32,3 +32,163 @@ define void @t2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind {
; CHECK-NEXT: movapd %xmm0, (%eax)
; CHECK-NEXT: ret
}
+
+
+define void @test3(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) nounwind {
+ %tmp = load <4 x float>* %B ; <<4 x float>> [#uses=2]
+ %tmp3 = load <4 x float>* %A ; <<4 x float>> [#uses=2]
+ %tmp.upgrd.1 = extractelement <4 x float> %tmp3, i32 0 ; <float> [#uses=1]
+ %tmp7 = extractelement <4 x float> %tmp, i32 0 ; <float> [#uses=1]
+ %tmp8 = extractelement <4 x float> %tmp3, i32 1 ; <float> [#uses=1]
+ %tmp9 = extractelement <4 x float> %tmp, i32 1 ; <float> [#uses=1]
+ %tmp10 = insertelement <4 x float> undef, float %tmp.upgrd.1, i32 0 ; <<4 x float>> [#uses=1]
+ %tmp11 = insertelement <4 x float> %tmp10, float %tmp7, i32 1 ; <<4 x float>> [#uses=1]
+ %tmp12 = insertelement <4 x float> %tmp11, float %tmp8, i32 2 ; <<4 x float>> [#uses=1]
+ %tmp13 = insertelement <4 x float> %tmp12, float %tmp9, i32 3 ; <<4 x float>> [#uses=1]
+ store <4 x float> %tmp13, <4 x float>* %res
+ ret void
+; CHECK: @test3
+; CHECK: unpcklps
+}
+
+define void @test4(<4 x float> %X, <4 x float>* %res) nounwind {
+ %tmp5 = shufflevector <4 x float> %X, <4 x float> undef, <4 x i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x float>> [#uses=1]
+ store <4 x float> %tmp5, <4 x float>* %res
+ ret void
+; CHECK: @test4
+; CHECK: pshufd $50, %xmm0, %xmm0
+}
+
+define <4 x i32> @test5(i8** %ptr) nounwind {
+; CHECK: test5:
+; CHECK: pxor
+; CHECK: punpcklbw
+; CHECK: punpcklwd
+
+ %tmp = load i8** %ptr ; <i8*> [#uses=1]
+ %tmp.upgrd.1 = bitcast i8* %tmp to float* ; <float*> [#uses=1]
+ %tmp.upgrd.2 = load float* %tmp.upgrd.1 ; <float> [#uses=1]
+ %tmp.upgrd.3 = insertelement <4 x float> undef, float %tmp.upgrd.2, i32 0 ; <<4 x float>> [#uses=1]
+ %tmp9 = insertelement <4 x float> %tmp.upgrd.3, float 0.000000e+00, i32 1 ; <<4 x float>> [#uses=1]
+ %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 2 ; <<4 x float>> [#uses=1]
+ %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=1]
+ %tmp21 = bitcast <4 x float> %tmp11 to <16 x i8> ; <<16 x i8>> [#uses=1]
+ %tmp22 = shufflevector <16 x i8> %tmp21, <16 x i8> zeroinitializer, <16 x i32> < i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23 > ; <<16 x i8>> [#uses=1]
+ %tmp31 = bitcast <16 x i8> %tmp22 to <8 x i16> ; <<8 x i16>> [#uses=1]
+ %tmp.upgrd.4 = shufflevector <8 x i16> zeroinitializer, <8 x i16> %tmp31, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 > ; <<8 x i16>> [#uses=1]
+ %tmp36 = bitcast <8 x i16> %tmp.upgrd.4 to <4 x i32> ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %tmp36
+}
+
+define void @test6(<4 x float>* %res, <4 x float>* %A) nounwind {
+ %tmp1 = load <4 x float>* %A ; <<4 x float>> [#uses=1]
+ %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>> [#uses=1]
+ store <4 x float> %tmp2, <4 x float>* %res
+ ret void
+
+; CHECK: test6:
+; CHECK: movaps (%eax), %xmm0
+; CHECK: movaps %xmm0, (%eax)
+}
+
+define void @test7() nounwind {
+ bitcast <4 x i32> zeroinitializer to <4 x float> ; <<4 x float>>:1 [#uses=1]
+ shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> zeroinitializer ; <<4 x float>>:2 [#uses=1]
+ store <4 x float> %2, <4 x float>* null
+ ret void
+
+; CHECK: test7:
+; CHECK: pxor %xmm0, %xmm0
+; CHECK: movaps %xmm0, 0
+}
+
+@x = external global [4 x i32]
+
+define <2 x i64> @test8() nounwind {
+ %tmp = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 0) ; <i32> [#uses=1]
+ %tmp3 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 1) ; <i32> [#uses=1]
+ %tmp5 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 2) ; <i32> [#uses=1]
+ %tmp7 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 3) ; <i32> [#uses=1]
+ %tmp.upgrd.1 = insertelement <4 x i32> undef, i32 %tmp, i32 0 ; <<4 x i32>> [#uses=1]
+ %tmp13 = insertelement <4 x i32> %tmp.upgrd.1, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1]
+ %tmp14 = insertelement <4 x i32> %tmp13, i32 %tmp5, i32 2 ; <<4 x i32>> [#uses=1]
+ %tmp15 = insertelement <4 x i32> %tmp14, i32 %tmp7, i32 3 ; <<4 x i32>> [#uses=1]
+ %tmp16 = bitcast <4 x i32> %tmp15 to <2 x i64> ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %tmp16
+; CHECK: test8:
+; CHECK: movups (%eax), %xmm0
+}
+
+define <4 x float> @test9(i32 %dummy, float %a, float %b, float %c, float %d) nounwind {
+ %tmp = insertelement <4 x float> undef, float %a, i32 0 ; <<4 x float>> [#uses=1]
+ %tmp11 = insertelement <4 x float> %tmp, float %b, i32 1 ; <<4 x float>> [#uses=1]
+ %tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2 ; <<4 x float>> [#uses=1]
+ %tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3 ; <<4 x float>> [#uses=1]
+ ret <4 x float> %tmp13
+; CHECK: test9:
+; CHECK: movups 8(%esp), %xmm0
+}
+
+define <4 x float> @test10(float %a, float %b, float %c, float %d) nounwind {
+ %tmp = insertelement <4 x float> undef, float %a, i32 0 ; <<4 x float>> [#uses=1]
+ %tmp11 = insertelement <4 x float> %tmp, float %b, i32 1 ; <<4 x float>> [#uses=1]
+ %tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2 ; <<4 x float>> [#uses=1]
+ %tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3 ; <<4 x float>> [#uses=1]
+ ret <4 x float> %tmp13
+; CHECK: test10:
+; CHECK: movaps 4(%esp), %xmm0
+}
+
+define <2 x double> @test11(double %a, double %b) nounwind {
+ %tmp = insertelement <2 x double> undef, double %a, i32 0 ; <<2 x double>> [#uses=1]
+ %tmp7 = insertelement <2 x double> %tmp, double %b, i32 1 ; <<2 x double>> [#uses=1]
+ ret <2 x double> %tmp7
+; CHECK: test11:
+; CHECK: movapd 4(%esp), %xmm0
+}
+
+define void @test12() nounwind {
+ %tmp1 = load <4 x float>* null ; <<4 x float>> [#uses=2]
+ %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 6, i32 7 > ; <<4 x float>> [#uses=1]
+ %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 6, i32 7 > ; <<4 x float>> [#uses=1]
+ %tmp4 = fadd <4 x float> %tmp2, %tmp3 ; <<4 x float>> [#uses=1]
+ store <4 x float> %tmp4, <4 x float>* null
+ ret void
+; CHECK: test12:
+; CHECK: movhlps
+; CHECK: shufps
+}
+
+define void @test13(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
+ %tmp3 = load <4 x float>* %B ; <<4 x float>> [#uses=1]
+ %tmp5 = load <4 x float>* %C ; <<4 x float>> [#uses=1]
+ %tmp11 = shufflevector <4 x float> %tmp3, <4 x float> %tmp5, <4 x i32> < i32 1, i32 4, i32 1, i32 5 > ; <<4 x float>> [#uses=1]
+ store <4 x float> %tmp11, <4 x float>* %res
+ ret void
+; CHECK: test13
+; CHECK: shufps $69, (%eax), %xmm0
+; CHECK: pshufd $-40, %xmm0, %xmm0
+}
+
+define <4 x float> @test14(<4 x float>* %x, <4 x float>* %y) nounwind {
+ %tmp = load <4 x float>* %y ; <<4 x float>> [#uses=2]
+ %tmp5 = load <4 x float>* %x ; <<4 x float>> [#uses=2]
+ %tmp9 = fadd <4 x float> %tmp5, %tmp ; <<4 x float>> [#uses=1]
+ %tmp21 = fsub <4 x float> %tmp5, %tmp ; <<4 x float>> [#uses=1]
+ %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 > ; <<4 x float>> [#uses=1]
+ ret <4 x float> %tmp27
+; CHECK: test14:
+; CHECK: addps %xmm1, %xmm0
+; CHECK: subps %xmm1, %xmm2
+; CHECK: movlhps %xmm2, %xmm0
+}
+
+define <4 x float> @test15(<4 x float>* %x, <4 x float>* %y) nounwind {
+entry:
+ %tmp = load <4 x float>* %y ; <<4 x float>> [#uses=1]
+ %tmp3 = load <4 x float>* %x ; <<4 x float>> [#uses=1]
+ %tmp4 = shufflevector <4 x float> %tmp3, <4 x float> %tmp, <4 x i32> < i32 2, i32 3, i32 6, i32 7 > ; <<4 x float>> [#uses=1]
+ ret <4 x float> %tmp4
+; CHECK: test15:
+; CHECK: movhlps %xmm1, %xmm0
+}
diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll
index 206cdff..9a60091 100644
--- a/test/CodeGen/X86/sse3.ll
+++ b/test/CodeGen/X86/sse3.ll
@@ -169,7 +169,7 @@ define internal void @t10() nounwind {
ret void
; X64: t10:
; X64: pextrw $4, %xmm0, %eax
-; X64: movlhps %xmm1, %xmm1
+; X64: unpcklpd %xmm1, %xmm1
; X64: pshuflw $8, %xmm1, %xmm1
; X64: pinsrw $2, %eax, %xmm1
; X64: pextrw $6, %xmm0, %eax
@@ -260,3 +260,18 @@ entry:
; X64: pinsrw $1, %eax, %xmm0
; X64: ret
}
+
+; rdar://8520311
+define <4 x i32> @t17() nounwind {
+entry:
+; X64: t17:
+; X64: movddup (%rax), %xmm0
+ %tmp1 = load <4 x float>* undef, align 16
+ %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+ %tmp3 = load <4 x float>* undef, align 16
+ %tmp4 = shufflevector <4 x float> %tmp2, <4 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
+ %tmp5 = bitcast <4 x float> %tmp3 to <4 x i32>
+ %tmp6 = shufflevector <4 x i32> %tmp5, <4 x i32> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
+ %tmp7 = and <4 x i32> %tmp6, <i32 undef, i32 undef, i32 -1, i32 0>
+ ret <4 x i32> %tmp7
+}
diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll
index ef66d1a..3a14fa2 100644
--- a/test/CodeGen/X86/sse41.ll
+++ b/test/CodeGen/X86/sse41.ll
@@ -224,3 +224,28 @@ declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone
declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone
declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone
+; This used to compile to insertps $0 + insertps $16. insertps $0 is always
+; pointless.
+define <2 x float> @buildvector(<2 x float> %A, <2 x float> %B) nounwind {
+entry:
+ %tmp7 = extractelement <2 x float> %A, i32 0
+ %tmp5 = extractelement <2 x float> %A, i32 1
+ %tmp3 = extractelement <2 x float> %B, i32 0
+ %tmp1 = extractelement <2 x float> %B, i32 1
+ %add.r = fadd float %tmp7, %tmp3
+ %add.i = fadd float %tmp5, %tmp1
+ %tmp11 = insertelement <2 x float> undef, float %add.r, i32 0
+ %tmp9 = insertelement <2 x float> %tmp11, float %add.i, i32 1
+ ret <2 x float> %tmp9
+; X32: buildvector:
+; X32-NOT: insertps $0
+; X32: insertps $16
+; X32-NOT: insertps $0
+; X32: ret
+; X64: buildvector:
+; X64-NOT: insertps $0
+; X64: insertps $16
+; X64-NOT: insertps $0
+; X64: ret
+}
+
diff --git a/test/CodeGen/X86/stdcall-notailcall.ll b/test/CodeGen/X86/stdcall-notailcall.ll
new file mode 100644
index 0000000..8e33c30
--- /dev/null
+++ b/test/CodeGen/X86/stdcall-notailcall.ll
@@ -0,0 +1,13 @@
+; RUN: llc -mtriple=i386-apple-darwin11 -O2 < %s | FileCheck %s
+
+%struct.I = type { i32 (...)** }
+define x86_stdcallcc void @bar(%struct.I* nocapture %this) ssp align 2 {
+; CHECK: bar:
+; CHECK-NOT: jmp
+; CHECK: ret $4
+entry:
+ tail call void @foo()
+ ret void
+}
+
+declare void @foo()
diff --git a/test/CodeGen/X86/stdcall.ll b/test/CodeGen/X86/stdcall.ll
index 70204bc..a7c2517 100644
--- a/test/CodeGen/X86/stdcall.ll
+++ b/test/CodeGen/X86/stdcall.ll
@@ -2,7 +2,7 @@
; PR5851
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
-target triple = "i386-mingw32"
+target triple = "i386-pc-mingw32"
%0 = type { void (...)* }
diff --git a/test/CodeGen/X86/store-narrow.ll b/test/CodeGen/X86/store-narrow.ll
index 5682e7c..0dd228e 100644
--- a/test/CodeGen/X86/store-narrow.ll
+++ b/test/CodeGen/X86/store-narrow.ll
@@ -1,6 +1,6 @@
; rdar://7860110
-; RUN: llc < %s | FileCheck %s -check-prefix=X64
-; RUN: llc -march=x86 < %s | FileCheck %s -check-prefix=X32
+; RUN: llc -asm-verbose=false < %s | FileCheck %s -check-prefix=X64
+; RUN: llc -march=x86 -asm-verbose=false < %s | FileCheck %s -check-prefix=X32
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-darwin10.2"
@@ -125,3 +125,44 @@ entry:
; X32: movb %cl, 5(%{{.*}})
}
+; PR7833
+
+@g_16 = internal global i32 -1
+
+; X64: test8:
+; X64-NEXT: movl _g_16(%rip), %eax
+; X64-NEXT: movl $0, _g_16(%rip)
+; X64-NEXT: orl $1, %eax
+; X64-NEXT: movl %eax, _g_16(%rip)
+; X64-NEXT: ret
+define void @test8() nounwind {
+ %tmp = load i32* @g_16
+ store i32 0, i32* @g_16
+ %or = or i32 %tmp, 1
+ store i32 %or, i32* @g_16
+ ret void
+}
+
+; X64: test9:
+; X64-NEXT: orb $1, _g_16(%rip)
+; X64-NEXT: ret
+define void @test9() nounwind {
+ %tmp = load i32* @g_16
+ %or = or i32 %tmp, 1
+ store i32 %or, i32* @g_16
+ ret void
+}
+
+; rdar://8494845 + PR8244
+; X64: test10:
+; X64-NEXT: movsbl (%rdi), %eax
+; X64-NEXT: shrl $8, %eax
+; X64-NEXT: ret
+define i8 @test10(i8* %P) nounwind ssp {
+entry:
+ %tmp = load i8* %P, align 1
+ %conv = sext i8 %tmp to i32
+ %shr3 = lshr i32 %conv, 8
+ %conv2 = trunc i32 %shr3 to i8
+ ret i8 %conv2
+}
diff --git a/test/CodeGen/X86/store_op_load_fold2.ll b/test/CodeGen/X86/store_op_load_fold2.ll
index 46e59e9..1168622 100644
--- a/test/CodeGen/X86/store_op_load_fold2.ll
+++ b/test/CodeGen/X86/store_op_load_fold2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | FileCheck %s
+; RUN: llc < %s -mtriple=i686-linux -x86-asm-syntax=intel | FileCheck %s
target datalayout = "e-p:32:32"
%struct.Macroblock = type { i32, i32, i32, i32, i32, [8 x i32], %struct.Macroblock*, %struct.Macroblock*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll
index 9662ad6..f546ac4 100644
--- a/test/CodeGen/X86/tail-opts.ll
+++ b/test/CodeGen/X86/tail-opts.ll
@@ -62,11 +62,11 @@ declare i8* @choose(i8*, i8*)
; CHECK: tail_duplicate_me:
; CHECK: movl $0, GHJK(%rip)
-; CHECK-NEXT: jmpq *%rbx
+; CHECK-NEXT: jmpq *%r
; CHECK: movl $0, GHJK(%rip)
-; CHECK-NEXT: jmpq *%rbx
+; CHECK-NEXT: jmpq *%r
; CHECK: movl $0, GHJK(%rip)
-; CHECK-NEXT: jmpq *%rbx
+; CHECK-NEXT: jmpq *%r
define void @tail_duplicate_me() nounwind {
entry:
@@ -153,19 +153,16 @@ bb30:
; an unconditional jump to complete a two-way conditional branch.
; CHECK: c_expand_expr_stmt:
-; CHECK: jmp .LBB3_7
-; CHECK-NEXT: .LBB3_12:
+; CHECK: jmp .LBB3_11
+; CHECK-NEXT: .LBB3_9:
; CHECK-NEXT: movq 8(%rax), %rax
+; CHECK-NEXT: xorb %dl, %dl
; CHECK-NEXT: movb 16(%rax), %al
; CHECK-NEXT: cmpb $16, %al
-; CHECK-NEXT: je .LBB3_6
+; CHECK-NEXT: je .LBB3_11
; CHECK-NEXT: cmpb $23, %al
-; CHECK-NEXT: je .LBB3_6
-; CHECK-NEXT: jmp .LBB3_15
-; CHECK-NEXT: .LBB3_14:
-; CHECK-NEXT: cmpb $23, %bl
-; CHECK-NEXT: jne .LBB3_15
-; CHECK-NEXT: .LBB3_15:
+; CHECK-NEXT: jne .LBB3_14
+; CHECK-NEXT: .LBB3_11:
%0 = type { %struct.rtx_def* }
%struct.lang_decl = type opaque
diff --git a/test/CodeGen/X86/tailcall-stackalign.ll b/test/CodeGen/X86/tailcall-stackalign.ll
index 0233139..d3f811c 100644
--- a/test/CodeGen/X86/tailcall-stackalign.ll
+++ b/test/CodeGen/X86/tailcall-stackalign.ll
@@ -19,5 +19,5 @@ define i32 @main(i32 %argc, i8** %argv) {
ret i32 0
}
-; CHECK: call tailcaller
+; CHECK: calll tailcaller
; CHECK-NEXT: subl $12
diff --git a/test/CodeGen/X86/tailcallfp2.ll b/test/CodeGen/X86/tailcallfp2.ll
index 4ec127f..04c4e95 100644
--- a/test/CodeGen/X86/tailcallfp2.ll
+++ b/test/CodeGen/X86/tailcallfp2.ll
@@ -1,8 +1,9 @@
-; RUN: llc < %s -march=x86 -tailcallopt | grep {jmp} | grep {\\*%edx}
+; RUN: llc < %s -march=x86 -tailcallopt | FileCheck %s
declare i32 @putchar(i32)
define fastcc i32 @checktail(i32 %x, i32* %f, i32 %g) nounwind {
+; CHECK: checktail:
%tmp1 = icmp sgt i32 %x, 0
br i1 %tmp1, label %if-then, label %if-else
@@ -10,6 +11,7 @@ if-then:
%fun_ptr = bitcast i32* %f to i32(i32, i32*, i32)*
%arg1 = add i32 %x, -1
call i32 @putchar(i32 90)
+; CHECK: jmpl *%e{{.*}}
%res = tail call fastcc i32 %fun_ptr( i32 %arg1, i32 * %f, i32 %g)
ret i32 %res
diff --git a/test/CodeGen/X86/tls9.ll b/test/CodeGen/X86/tls9.ll
index 214146f..7d08df8 100644
--- a/test/CodeGen/X86/tls9.ll
+++ b/test/CodeGen/X86/tls9.ll
@@ -5,7 +5,7 @@
@i = external hidden thread_local global i32
-define i32 @f() {
+define i32 @f() nounwind {
entry:
%tmp1 = load i32* @i
ret i32 %tmp1
diff --git a/test/CodeGen/X86/twoaddr-coalesce.ll b/test/CodeGen/X86/twoaddr-coalesce.ll
index 4c37225..6f6d6f2 100644
--- a/test/CodeGen/X86/twoaddr-coalesce.ll
+++ b/test/CodeGen/X86/twoaddr-coalesce.ll
@@ -3,7 +3,7 @@
@"\01LC" = internal constant [4 x i8] c"%d\0A\00" ; <[4 x i8]*> [#uses=1]
-define i32 @main() nounwind {
+define i32 @foo() nounwind {
bb1.thread:
br label %bb1
diff --git a/test/CodeGen/X86/uint64-to-float.ll b/test/CodeGen/X86/uint64-to-float.ll
new file mode 100644
index 0000000..d9f753c
--- /dev/null
+++ b/test/CodeGen/X86/uint64-to-float.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; Verify that we are using the efficient uitofp --> sitofp lowering illustrated
+; by the compiler_rt implementation of __floatundisf.
+; <rdar://problem/8493982>
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; CHECK: testq %rdi, %rdi
+; CHECK-NEXT: jns LBB0_2
+; CHECK: shrq
+; CHECK-NEXT: andq
+; CHECK-NEXT: orq
+; CHECK-NEXT: cvtsi2ss
+; CHECK: LBB0_2
+; CHECK-NEXT: cvtsi2ss
+define float @test(i64 %a) {
+entry:
+ %b = uitofp i64 %a to float
+ ret float %b
+}
diff --git a/test/CodeGen/X86/unaligned-load.ll b/test/CodeGen/X86/unaligned-load.ll
index a99af06..6a493c0 100644
--- a/test/CodeGen/X86/unaligned-load.ll
+++ b/test/CodeGen/X86/unaligned-load.ll
@@ -13,7 +13,7 @@ entry:
bb:
%String2Loc9 = getelementptr inbounds [31 x i8]* %String2Loc, i64 0, i64 0
call void @llvm.memcpy.i64(i8* %String2Loc9, i8* getelementptr inbounds ([31 x i8]* @.str3, i64 0, i64 0), i64 31, i32 1)
-; I386: call {{_?}}memcpy
+; I386: calll {{_?}}memcpy
; CORE2: movabsq
; CORE2: movabsq
diff --git a/test/CodeGen/X86/vec_insert-5.ll b/test/CodeGen/X86/vec_insert-5.ll
index 291fc04..471cc16 100644
--- a/test/CodeGen/X86/vec_insert-5.ll
+++ b/test/CodeGen/X86/vec_insert-5.ll
@@ -1,15 +1,16 @@
; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
-; RUN: grep psllq %t | grep 32
+; RUN: grep shll %t | grep 12
; RUN: grep pslldq %t | grep 12
; RUN: grep psrldq %t | grep 8
; RUN: grep psrldq %t | grep 12
+; There are no MMX operations in @t1
-define void @t1(i32 %a, <1 x i64>* %P) nounwind {
+define void @t1(i32 %a, x86_mmx* %P) nounwind {
%tmp12 = shl i32 %a, 12
%tmp21 = insertelement <2 x i32> undef, i32 %tmp12, i32 1
%tmp22 = insertelement <2 x i32> %tmp21, i32 0, i32 0
- %tmp23 = bitcast <2 x i32> %tmp22 to <1 x i64>
- store <1 x i64> %tmp23, <1 x i64>* %P
+ %tmp23 = bitcast <2 x i32> %tmp22 to x86_mmx
+ store x86_mmx %tmp23, x86_mmx* %P
ret void
}
diff --git a/test/CodeGen/X86/vec_insert-7.ll b/test/CodeGen/X86/vec_insert-7.ll
index 9ede10f..268b5c4 100644
--- a/test/CodeGen/X86/vec_insert-7.ll
+++ b/test/CodeGen/X86/vec_insert-7.ll
@@ -1,8 +1,15 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx -mtriple=i686-apple-darwin9 -o - | grep punpckldq
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse42 -mtriple=i686-apple-darwin9 | FileCheck %s
+; MMX insertelement is not available; these are promoted to XMM.
+; (Without SSE they are split to two ints, and the code is much better.)
-define <2 x i32> @mmx_movzl(<2 x i32> %x) nounwind {
+define x86_mmx @mmx_movzl(x86_mmx %x) nounwind {
entry:
- %tmp3 = insertelement <2 x i32> %x, i32 32, i32 0 ; <<2 x i32>> [#uses=1]
+; CHECK: mmx_movzl
+; CHECK: pinsrd
+; CHECK: pinsrd
+ %tmp = bitcast x86_mmx %x to <2 x i32>
+ %tmp3 = insertelement <2 x i32> %tmp, i32 32, i32 0 ; <<2 x i32>> [#uses=1]
%tmp8 = insertelement <2 x i32> %tmp3, i32 0, i32 1 ; <<2 x i32>> [#uses=1]
- ret <2 x i32> %tmp8
+ %tmp9 = bitcast <2 x i32> %tmp8 to x86_mmx
+ ret x86_mmx %tmp9
}
diff --git a/test/CodeGen/X86/vec_insert-9.ll b/test/CodeGen/X86/vec_insert-9.ll
index 2e829df..e5a7ccc 100644
--- a/test/CodeGen/X86/vec_insert-9.ll
+++ b/test/CodeGen/X86/vec_insert-9.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=x86 -mattr=+sse41 > %t
-; RUN: grep pinsrd %t | count 2
+; RUN: grep pinsrd %t | count 1
define <4 x i32> @var_insert2(<4 x i32> %x, i32 %val, i32 %idx) nounwind {
entry:
diff --git a/test/CodeGen/X86/vec_set-F.ll b/test/CodeGen/X86/vec_set-F.ll
index 4f0acb2..6dd3cb0 100644
--- a/test/CodeGen/X86/vec_set-F.ll
+++ b/test/CodeGen/X86/vec_set-F.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep mov | count 3
+; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | grep movq
+; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | grep movsd
+; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | grep mov | count 3
define <2 x i64> @t1(<2 x i64>* %ptr) nounwind {
%tmp45 = bitcast <2 x i64>* %ptr to <2 x i32>*
diff --git a/test/CodeGen/X86/vec_shuffle-10.ll b/test/CodeGen/X86/vec_shuffle-10.ll
deleted file mode 100644
index a63e386..0000000
--- a/test/CodeGen/X86/vec_shuffle-10.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
-; RUN: grep unpcklps %t | count 1
-; RUN: grep pshufd %t | count 1
-; RUN: not grep {sub.*esp} %t
-
-define void @test(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) {
- %tmp = load <4 x float>* %B ; <<4 x float>> [#uses=2]
- %tmp3 = load <4 x float>* %A ; <<4 x float>> [#uses=2]
- %tmp.upgrd.1 = extractelement <4 x float> %tmp3, i32 0 ; <float> [#uses=1]
- %tmp7 = extractelement <4 x float> %tmp, i32 0 ; <float> [#uses=1]
- %tmp8 = extractelement <4 x float> %tmp3, i32 1 ; <float> [#uses=1]
- %tmp9 = extractelement <4 x float> %tmp, i32 1 ; <float> [#uses=1]
- %tmp10 = insertelement <4 x float> undef, float %tmp.upgrd.1, i32 0 ; <<4 x float>> [#uses=1]
- %tmp11 = insertelement <4 x float> %tmp10, float %tmp7, i32 1 ; <<4 x float>> [#uses=1]
- %tmp12 = insertelement <4 x float> %tmp11, float %tmp8, i32 2 ; <<4 x float>> [#uses=1]
- %tmp13 = insertelement <4 x float> %tmp12, float %tmp9, i32 3 ; <<4 x float>> [#uses=1]
- store <4 x float> %tmp13, <4 x float>* %res
- ret void
-}
-
-define void @test2(<4 x float> %X, <4 x float>* %res) {
- %tmp5 = shufflevector <4 x float> %X, <4 x float> undef, <4 x i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x float>> [#uses=1]
- store <4 x float> %tmp5, <4 x float>* %res
- ret void
-}
diff --git a/test/CodeGen/X86/vec_shuffle-3.ll b/test/CodeGen/X86/vec_shuffle-3.ll
deleted file mode 100644
index f4930b0..0000000
--- a/test/CodeGen/X86/vec_shuffle-3.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
-; RUN: grep movlhps %t | count 1
-; RUN: grep movhlps %t | count 1
-
-define <4 x float> @test1(<4 x float>* %x, <4 x float>* %y) {
- %tmp = load <4 x float>* %y ; <<4 x float>> [#uses=2]
- %tmp5 = load <4 x float>* %x ; <<4 x float>> [#uses=2]
- %tmp9 = fadd <4 x float> %tmp5, %tmp ; <<4 x float>> [#uses=1]
- %tmp21 = fsub <4 x float> %tmp5, %tmp ; <<4 x float>> [#uses=1]
- %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 > ; <<4 x float>> [#uses=1]
- ret <4 x float> %tmp27
-}
-
-define <4 x float> @movhl(<4 x float>* %x, <4 x float>* %y) {
-entry:
- %tmp = load <4 x float>* %y ; <<4 x float>> [#uses=1]
- %tmp3 = load <4 x float>* %x ; <<4 x float>> [#uses=1]
- %tmp4 = shufflevector <4 x float> %tmp3, <4 x float> %tmp, <4 x i32> < i32 2, i32 3, i32 6, i32 7 > ; <<4 x float>> [#uses=1]
- ret <4 x float> %tmp4
-}
diff --git a/test/CodeGen/X86/vec_shuffle-37.ll b/test/CodeGen/X86/vec_shuffle-37.ll
new file mode 100644
index 0000000..b090930
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-37.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc -O0 < %s -march=x86 -mcpu=core2 | FileCheck %s --check-prefix=CHECK_O0
+
+define <4 x i32> @t00(<4 x i32>* %a0) nounwind ssp {
+entry:
+; CHECK: movaps (%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, %xmm1
+; CHECK-NEXT: movlps (%rax), %xmm1
+; CHECK-NEXT: shufps $36, %xmm1, %xmm0
+ %0 = load <4 x i32>* undef, align 16
+ %1 = load <4 x i32>* %a0, align 16
+ %2 = shufflevector <4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+ ret <4 x i32> %2
+}
+
+define void @t01(double* %a0) nounwind ssp {
+entry:
+; CHECK_O0: movsd (%eax), %xmm0
+; CHECK_O0: unpcklpd %xmm0, %xmm0
+ %tmp93 = load double* %a0, align 8
+ %vecinit94 = insertelement <2 x double> undef, double %tmp93, i32 1
+ store <2 x double> %vecinit94, <2 x double>* undef
+ ret void
+}
diff --git a/test/CodeGen/X86/vec_shuffle-4.ll b/test/CodeGen/X86/vec_shuffle-4.ll
deleted file mode 100644
index 829fedf..0000000
--- a/test/CodeGen/X86/vec_shuffle-4.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
-; RUN: grep shuf %t | count 2
-; RUN: not grep unpck %t
-
-define void @test(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B, <4 x float>* %C) {
- %tmp3 = load <4 x float>* %B ; <<4 x float>> [#uses=1]
- %tmp5 = load <4 x float>* %C ; <<4 x float>> [#uses=1]
- %tmp11 = shufflevector <4 x float> %tmp3, <4 x float> %tmp5, <4 x i32> < i32 1, i32 4, i32 1, i32 5 > ; <<4 x float>> [#uses=1]
- store <4 x float> %tmp11, <4 x float>* %res
- ret void
-}
-
diff --git a/test/CodeGen/X86/vec_shuffle-5.ll b/test/CodeGen/X86/vec_shuffle-5.ll
deleted file mode 100644
index c24167a..0000000
--- a/test/CodeGen/X86/vec_shuffle-5.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
-; RUN: grep movhlps %t | count 1
-; RUN: grep shufps %t | count 1
-
-define void @test() nounwind {
- %tmp1 = load <4 x float>* null ; <<4 x float>> [#uses=2]
- %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 6, i32 7 > ; <<4 x float>> [#uses=1]
- %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 6, i32 7 > ; <<4 x float>> [#uses=1]
- %tmp4 = fadd <4 x float> %tmp2, %tmp3 ; <<4 x float>> [#uses=1]
- store <4 x float> %tmp4, <4 x float>* null
- ret void
-}
-
diff --git a/test/CodeGen/X86/vec_shuffle-6.ll b/test/CodeGen/X86/vec_shuffle-6.ll
deleted file mode 100644
index 28fd59b..0000000
--- a/test/CodeGen/X86/vec_shuffle-6.ll
+++ /dev/null
@@ -1,42 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
-; RUN: grep movapd %t | count 1
-; RUN: grep movaps %t | count 1
-; RUN: grep movups %t | count 2
-
-target triple = "i686-apple-darwin"
-@x = external global [4 x i32]
-
-define <2 x i64> @test1() {
- %tmp = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 0) ; <i32> [#uses=1]
- %tmp3 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 1) ; <i32> [#uses=1]
- %tmp5 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 2) ; <i32> [#uses=1]
- %tmp7 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 3) ; <i32> [#uses=1]
- %tmp.upgrd.1 = insertelement <4 x i32> undef, i32 %tmp, i32 0 ; <<4 x i32>> [#uses=1]
- %tmp13 = insertelement <4 x i32> %tmp.upgrd.1, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1]
- %tmp14 = insertelement <4 x i32> %tmp13, i32 %tmp5, i32 2 ; <<4 x i32>> [#uses=1]
- %tmp15 = insertelement <4 x i32> %tmp14, i32 %tmp7, i32 3 ; <<4 x i32>> [#uses=1]
- %tmp16 = bitcast <4 x i32> %tmp15 to <2 x i64> ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %tmp16
-}
-
-define <4 x float> @test2(i32 %dummy, float %a, float %b, float %c, float %d) {
- %tmp = insertelement <4 x float> undef, float %a, i32 0 ; <<4 x float>> [#uses=1]
- %tmp11 = insertelement <4 x float> %tmp, float %b, i32 1 ; <<4 x float>> [#uses=1]
- %tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2 ; <<4 x float>> [#uses=1]
- %tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3 ; <<4 x float>> [#uses=1]
- ret <4 x float> %tmp13
-}
-
-define <4 x float> @test3(float %a, float %b, float %c, float %d) {
- %tmp = insertelement <4 x float> undef, float %a, i32 0 ; <<4 x float>> [#uses=1]
- %tmp11 = insertelement <4 x float> %tmp, float %b, i32 1 ; <<4 x float>> [#uses=1]
- %tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2 ; <<4 x float>> [#uses=1]
- %tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3 ; <<4 x float>> [#uses=1]
- ret <4 x float> %tmp13
-}
-
-define <2 x double> @test4(double %a, double %b) {
- %tmp = insertelement <2 x double> undef, double %a, i32 0 ; <<2 x double>> [#uses=1]
- %tmp7 = insertelement <2 x double> %tmp, double %b, i32 1 ; <<2 x double>> [#uses=1]
- ret <2 x double> %tmp7
-}
diff --git a/test/CodeGen/X86/vec_shuffle-7.ll b/test/CodeGen/X86/vec_shuffle-7.ll
deleted file mode 100644
index 64bd6a3..0000000
--- a/test/CodeGen/X86/vec_shuffle-7.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
-; RUN: grep pxor %t | count 1
-; RUN: not grep shufps %t
-
-define void @test() {
- bitcast <4 x i32> zeroinitializer to <4 x float> ; <<4 x float>>:1 [#uses=1]
- shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> zeroinitializer ; <<4 x float>>:2 [#uses=1]
- store <4 x float> %2, <4 x float>* null
- unreachable
-}
-
diff --git a/test/CodeGen/X86/vec_shuffle-8.ll b/test/CodeGen/X86/vec_shuffle-8.ll
deleted file mode 100644
index 964ce7b..0000000
--- a/test/CodeGen/X86/vec_shuffle-8.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | \
-; RUN: not grep shufps
-
-define void @test(<4 x float>* %res, <4 x float>* %A) {
- %tmp1 = load <4 x float>* %A ; <<4 x float>> [#uses=1]
- %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>> [#uses=1]
- store <4 x float> %tmp2, <4 x float>* %res
- ret void
-}
-
diff --git a/test/CodeGen/X86/vec_shuffle-9.ll b/test/CodeGen/X86/vec_shuffle-9.ll
deleted file mode 100644
index 0719586..0000000
--- a/test/CodeGen/X86/vec_shuffle-9.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
-
-define <4 x i32> @test(i8** %ptr) {
-; CHECK: pxor
-; CHECK: punpcklbw
-; CHECK: punpcklwd
-
- %tmp = load i8** %ptr ; <i8*> [#uses=1]
- %tmp.upgrd.1 = bitcast i8* %tmp to float* ; <float*> [#uses=1]
- %tmp.upgrd.2 = load float* %tmp.upgrd.1 ; <float> [#uses=1]
- %tmp.upgrd.3 = insertelement <4 x float> undef, float %tmp.upgrd.2, i32 0 ; <<4 x float>> [#uses=1]
- %tmp9 = insertelement <4 x float> %tmp.upgrd.3, float 0.000000e+00, i32 1 ; <<4 x float>> [#uses=1]
- %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 2 ; <<4 x float>> [#uses=1]
- %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=1]
- %tmp21 = bitcast <4 x float> %tmp11 to <16 x i8> ; <<16 x i8>> [#uses=1]
- %tmp22 = shufflevector <16 x i8> %tmp21, <16 x i8> zeroinitializer, <16 x i32> < i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23 > ; <<16 x i8>> [#uses=1]
- %tmp31 = bitcast <16 x i8> %tmp22 to <8 x i16> ; <<8 x i16>> [#uses=1]
- %tmp.upgrd.4 = shufflevector <8 x i16> zeroinitializer, <8 x i16> %tmp31, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 > ; <<8 x i16>> [#uses=1]
- %tmp36 = bitcast <8 x i16> %tmp.upgrd.4 to <4 x i32> ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %tmp36
-}
diff --git a/test/CodeGen/X86/vec_zero_cse.ll b/test/CodeGen/X86/vec_zero_cse.ll
index 3b15d4c..8aa5094 100644
--- a/test/CodeGen/X86/vec_zero_cse.ll
+++ b/test/CodeGen/X86/vec_zero_cse.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pxor | count 2
-; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pcmpeqd | count 2
+; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pxor | count 1
+; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pcmpeqd | count 1
+; 64-bit stores here do not use MMX.
@M1 = external global <1 x i64>
@M2 = external global <2 x i32>
diff --git a/test/CodeGen/X86/widen_select-1.ll b/test/CodeGen/X86/widen_select-1.ll
index d9de892..061e33f 100644
--- a/test/CodeGen/X86/widen_select-1.ll
+++ b/test/CodeGen/X86/widen_select-1.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
-; CHECK: jne
+; CHECK: je
; widening select v6i32 and then a sub
diff --git a/test/CodeGen/X86/win64_params.ll b/test/CodeGen/X86/win64_params.ll
new file mode 100644
index 0000000..0b67368
--- /dev/null
+++ b/test/CodeGen/X86/win64_params.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s
+
+; Verify that the 5th and 6th parameters are coming from the correct location
+; on the stack.
+define i32 @f6(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6) nounwind readnone optsize {
+entry:
+; CHECK: movl 80(%rsp), %eax
+; CHECK: addl 72(%rsp), %eax
+ %add = add nsw i32 %p6, %p5
+ ret i32 %add
+}
diff --git a/test/CodeGen/X86/win64_vararg.ll b/test/CodeGen/X86/win64_vararg.ll
new file mode 100644
index 0000000..072f36a
--- /dev/null
+++ b/test/CodeGen/X86/win64_vararg.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s
+
+; Verify that the var arg parameters which are passed in registers are stored
+; in home stack slots allocated by the caller and that AP is correctly
+; calculated.
+define void @average_va(i32 %count, ...) nounwind {
+entry:
+; CHECK: subq $40, %rsp
+; CHECK: movq %r9, 72(%rsp)
+; CHECK: movq %r8, 64(%rsp)
+; CHECK: movq %rdx, 56(%rsp)
+; CHECK: leaq 56(%rsp), %rax
+
+ %ap = alloca i8*, align 8 ; <i8**> [#uses=1]
+ %ap1 = bitcast i8** %ap to i8* ; <i8*> [#uses=1]
+ call void @llvm.va_start(i8* %ap1)
+ ret void
+}
+
+declare void @llvm.va_start(i8*) nounwind
diff --git a/test/CodeGen/X86/win_chkstk.ll b/test/CodeGen/X86/win_chkstk.ll
new file mode 100644
index 0000000..a377a7d
--- /dev/null
+++ b/test/CodeGen/X86/win_chkstk.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN_X32
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=WIN_X64
+; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X32
+; RUN: llc < %s -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X64
+; RUN: llc < %s -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
+
+; Windows and mingw require a prologue helper routine if more than 4096 bytes area
+; allocated on the stack. Windows uses __chkstk and mingw uses __alloca. __alloca
+; and the 32-bit version of __chkstk will probe the stack and adjust the stack pointer.
+; The 64-bit version of __chkstk is only responsible for probing the stack. The 64-bit
+; prologue is responsible for adjusting the stack pointer.
+
+; Stack allocation >= 4096 bytes will require call to __chkstk in the Windows ABI.
+define i32 @main4k() nounwind {
+entry:
+; WIN_X32: calll __chkstk
+; WIN_X64: callq __chkstk
+; MINGW_X32: calll __alloca
+; MINGW_X64: callq _alloca
+; LINUX-NOT: call __chkstk
+ %array4096 = alloca [4096 x i8], align 16 ; <[4096 x i8]*> [#uses=0]
+ ret i32 0
+}
+
+; Make sure we don't call __chkstk or __alloca when we have less than a 4096 stack
+; allocation.
+define i32 @main128() nounwind {
+entry:
+; WIN_X32: # BB#0:
+; WIN_X32-NOT: calll __chkstk
+; WIN_X32: ret
+
+; WIN_X64: # BB#0:
+; WIN_X64-NOT: callq __chkstk
+; WIN_X64: ret
+
+; MINGW_X64: # BB#0:
+; MINGW_X64-NOT: callq _alloca
+; MINGW_X64: ret
+
+; LINUX: # BB#0:
+; LINUX-NOT: call __chkstk
+; LINUX: ret
+ %array128 = alloca [128 x i8], align 16 ; <[128 x i8]*> [#uses=0]
+ ret i32 0
+}