diff options
author | Victor Umansky <victor.umansky@intel.com> | 2012-01-05 08:46:19 +0000 |
---|---|---|
committer | Victor Umansky <victor.umansky@intel.com> | 2012-01-05 08:46:19 +0000 |
commit | 19d8559019b75edfd7f5b05ffa266bc278127854 (patch) | |
tree | 817bc1818c4bca5350ec8156caebfdeefe21a334 /test | |
parent | 30c90c973ab7b08df2a1eabf6bde5c6af5cda4a2 (diff) | |
download | external_llvm-19d8559019b75edfd7f5b05ffa266bc278127854.zip external_llvm-19d8559019b75edfd7f5b05ffa266bc278127854.tar.gz external_llvm-19d8559019b75edfd7f5b05ffa266bc278127854.tar.bz2 |
Peephole optimization of ptest-conditioned branch in X86 arch. Performs instruction combining of sequences generated by ptestz/ptestc intrinsics to ptest+jcc pair for SSE and AVX.
Testing: passed 'make check' including LIT tests for all sequences being handled (both SSE and AVX)
Reviewers: Evan Cheng, David Blaikie, Bruno Lopes, Elena Demikhovsky, Chad Rosier, Anton Korobeynikov
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@147601 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
-rwxr-xr-x | test/CodeGen/X86/avx-brcond.ll | 244 | ||||
-rw-r--r-- | test/CodeGen/X86/brcond.ll | 246 |
2 files changed, 489 insertions, 1 deletions
diff --git a/test/CodeGen/X86/avx-brcond.ll b/test/CodeGen/X86/avx-brcond.ll new file mode 100755 index 0000000..cecacc2 --- /dev/null +++ b/test/CodeGen/X86/avx-brcond.ll @@ -0,0 +1,244 @@ +; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx | FileCheck %s + +declare i32 @llvm.x86.avx.ptestz.256(<4 x i64> %p1, <4 x i64> %p2) nounwind +declare i32 @llvm.x86.avx.ptestc.256(<4 x i64> %p1, <4 x i64> %p2) nounwind + +define <4 x float> @test1(<4 x i64> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test1: +; CHECK: vptest +; CHECK-NEXT: jne +; CHECK: ret + + %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a, <4 x i64> %a) nounwind + %one = icmp ne i32 %res, 0 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test2(<4 x i64> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test2: +; CHECK: vptest +; CHECK-NEXT: je +; CHECK: ret + + %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a, <4 x i64> %a) nounwind + %one = icmp eq i32 %res, 0 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test3(<4 x i64> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test3: +; CHECK: vptest +; CHECK-NEXT: jne +; CHECK: ret + + %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a, <4 x i64> %a) nounwind + %one = trunc i32 %res to i1 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test4(<4 x i64> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test4: +; CHECK: vptest +; CHECK-NEXT: jae +; CHECK: ret + + %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a, <4 x i64> %a) nounwind + %one = icmp ne i32 %res, 0 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test5(<4 x i64> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test5: +; CHECK: vptest +; CHECK-NEXT: jb +; CHECK: ret + + %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a, <4 x i64> %a) nounwind + %one = icmp eq i32 %res, 0 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test6(<4 x i64> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test6: +; CHECK: vptest +; CHECK-NEXT: jae +; CHECK: ret + + %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a, <4 x i64> %a) nounwind + %one = trunc i32 %res to i1 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test7(<4 x i64> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test7: +; CHECK: vptest +; CHECK-NEXT: jne +; CHECK: ret + + %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a, <4 x i64> %a) nounwind + %one = icmp eq i32 %res, 1 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test8(<4 x i64> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test8: +; CHECK: vptest +; CHECK-NEXT: je +; CHECK: ret + + %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a, <4 x i64> %a) nounwind + %one = icmp ne i32 %res, 1 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test9(<4 x i64> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test9: +; CHECK: vptest +; CHECK-NEXT: jae +; CHECK: ret + + %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a, <4 x i64> %a) nounwind + %one = icmp eq i32 %res, 1 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test10(<4 x i64> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test10: +; CHECK: vptest +; CHECK-NEXT: jb +; CHECK: ret + + %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a, <4 x i64> %a) nounwind + %one = icmp ne i32 %res, 1 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} diff --git a/test/CodeGen/X86/brcond.ll b/test/CodeGen/X86/brcond.ll index 5cdc100..ff3b2e4 100644 --- a/test/CodeGen/X86/brcond.ll +++ b/test/CodeGen/X86/brcond.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=core2 | FileCheck %s +; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=penryn | FileCheck %s + ; rdar://7475489 define i32 @test1(i32 %a, i32 %b) nounwind ssp { @@ -106,3 +107,246 @@ bb2: ; preds = %entry, %bb1 %.0 = fptrunc double %.0.in to float ; <float> [#uses=1] ret float %.0 } + +declare i32 @llvm.x86.sse41.ptestz(<4 x float> %p1, <4 x float> %p2) nounwind +declare i32 @llvm.x86.sse41.ptestc(<4 x float> %p1, <4 x float> %p2) nounwind + +define <4 x float> @test5(<4 x float> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test5: +; CHECK: ptest +; CHECK-NEXT: jne +; CHECK: ret + + %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind + %one = icmp ne i32 %res, 0 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test6(<4 x float> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test6: +; CHECK: ptest +; CHECK-NEXT: je +; CHECK: ret + + %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind + %one = icmp eq i32 %res, 0 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test7(<4 x float> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test7: +; CHECK: ptest +; CHECK-NEXT: jne +; CHECK: ret + + %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind + %one = trunc i32 %res to i1 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test8(<4 x float> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test8: +; CHECK: ptest +; CHECK-NEXT: jae +; CHECK: ret + + %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a, <4 x float> %a) nounwind + %one = icmp ne i32 %res, 0 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test9(<4 x float> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test9: +; CHECK: ptest +; CHECK-NEXT: jb +; CHECK: ret + + %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a, <4 x float> %a) nounwind + %one = icmp eq i32 %res, 0 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test10(<4 x float> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test10: +; CHECK: ptest +; CHECK-NEXT: jae +; CHECK: ret + + %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a, <4 x float> %a) nounwind + %one = trunc i32 %res to i1 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test11(<4 x float> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test11: +; CHECK: ptest +; CHECK-NEXT: jne +; CHECK: ret + + %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind + %one = icmp eq i32 %res, 1 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test12(<4 x float> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test12: +; CHECK: ptest +; CHECK-NEXT: je +; CHECK: ret + + %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind + %one = icmp ne i32 %res, 1 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test13(<4 x float> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test13: +; CHECK: ptest +; CHECK-NEXT: jae +; CHECK: ret + + %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a, <4 x float> %a) nounwind + %one = icmp eq i32 %res, 1 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} + +define <4 x float> @test14(<4 x float> %a, <4 x float> %b) nounwind { +entry: +; CHECK: test14: +; CHECK: ptest +; CHECK-NEXT: jb +; CHECK: ret + + %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a, <4 x float> %a) nounwind + %one = icmp ne i32 %res, 1 + br i1 %one, label %bb1, label %bb2 + +bb1: + %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +bb2: + %d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 > + br label %return + +return: + %e = phi <4 x float> [%c, %bb1], [%d, %bb2] + ret <4 x float> %e +} |