On AVX/AVX2 the type v8i1 is legalized to v8i16, which is an XMM sized

register. In most cases we actually compare or select YMM-sized registers and mixing the two types creates horrible code. This commit optimizes some of the transition sequences. PR14657. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171148 91177308-0d34-0410-b5e6-96231b3b80d8
author: Nadav Rotem <nrotem@apple.com> 2012-12-27 08:15:45 +0000
committer: Nadav Rotem <nrotem@apple.com> 2012-12-27 08:15:45 +0000
commit: d6fb53adb19ccfbfb1eedec11c899aaa8401d036 (patch)
tree: 5a44210f5aa0c0130845b720b747cabe76301e34 /test/CodeGen/X86/v8i1-masks.ll
parent: 3c22a444001ac3f7a89c9888d60aec69352d0e58 (diff)
download: external_llvm-d6fb53adb19ccfbfb1eedec11c899aaa8401d036.zip
external_llvm-d6fb53adb19ccfbfb1eedec11c899aaa8401d036.tar.gz
external_llvm-d6fb53adb19ccfbfb1eedec11c899aaa8401d036.tar.bz2
1 files changed, 38 insertions, 0 deletions
diff --git a/test/CodeGen/X86/v8i1-masks.ll b/test/CodeGen/X86/v8i1-masks.ll
new file mode 100644
index 0000000..0107999
--- /dev/null
+++ b/test/CodeGen/X86/v8i1-masks.ll
@@ -0,0 +1,38 @@
+; RUN: llc -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -o - < %s | FileCheck %s
+
+;CHECK: and_masks
+;CHECK: vmovups
+;CHECK-NEXT: vcmpltp
+;CHECK-NEXT: vandps
+;CHECK-NEXT: vmovups
+;CHECK: ret
+
+define void @and_masks(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
+  %v0 = load <8 x float>* %a, align 16
+  %v1 = load <8 x float>* %b, align 16
+  %m0 = fcmp olt <8 x float> %v1, %v0
+  %v2 = load <8 x float>* %c, align 16
+  %m1 = fcmp olt <8 x float> %v2, %v0
+  %mand = and <8 x i1> %m1, %m0
+  %r = zext <8 x i1> %mand to <8 x i32>
+  store <8 x i32> %r, <8 x i32>* undef, align 16
+  ret void
+}
+
+;CHECK: neg_mask
+;CHECK:  vmovups
+;CHECK-NEXT: vcmpltps
+;CHECK-NEXT: vandps
+;CHECK-NEXT: vmovups
+;CHECK: ret
+
+define void @neg_masks(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
+  %v0 = load <8 x float>* %a, align 16
+  %v1 = load <8 x float>* %b, align 16
+  %m0 = fcmp olt <8 x float> %v1, %v0
+  %mand = xor <8 x i1> %m0, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
+  %r = zext <8 x i1> %mand to <8 x i32>
+  store <8 x i32> %r, <8 x i32>* undef, align 16
+  ret void
+}
+
author	Nadav Rotem <nrotem@apple.com>	2012-12-27 08:15:45 +0000
committer	Nadav Rotem <nrotem@apple.com>	2012-12-27 08:15:45 +0000
commit	d6fb53adb19ccfbfb1eedec11c899aaa8401d036 (patch)
tree	5a44210f5aa0c0130845b720b747cabe76301e34 /test/CodeGen/X86/v8i1-masks.ll
parent	3c22a444001ac3f7a89c9888d60aec69352d0e58 (diff)
download	external_llvm-d6fb53adb19ccfbfb1eedec11c899aaa8401d036.zip external_llvm-d6fb53adb19ccfbfb1eedec11c899aaa8401d036.tar.gz external_llvm-d6fb53adb19ccfbfb1eedec11c899aaa8401d036.tar.bz2