diff options
| author | Juergen Ributzka <juergen@apple.com> | 2013-07-16 18:20:45 +0000 |
|---|---|---|
| committer | Juergen Ributzka <juergen@apple.com> | 2013-07-16 18:20:45 +0000 |
| commit | b95e0f6f2f43d2c9ae8dd9407f9216d02fa4c833 (patch) | |
| tree | a428edd0ee9d8bb124bd75f325251eab59215d37 /test | |
| parent | 71981ef040dd94438449aeca726cab5839d8ec3c (diff) | |
| download | external_llvm-b95e0f6f2f43d2c9ae8dd9407f9216d02fa4c833.zip external_llvm-b95e0f6f2f43d2c9ae8dd9407f9216d02fa4c833.tar.gz external_llvm-b95e0f6f2f43d2c9ae8dd9407f9216d02fa4c833.tar.bz2 | |
[X86] Use min/max to optimze unsigend vector comparison on X86
Use PMIN/PMAX for UGE/ULE vector comparions to reduce the number of required
instructions. This trick also works for UGT/ULT, but there is no advantage in
doing so. It wouldn't reduce the number of instructions and it would actually
reduce performance.
Reviewer: Ben
radar:5972691
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186432 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test')
| -rw-r--r-- | test/CodeGen/X86/vec_setcc.ll | 126 |
1 files changed, 126 insertions, 0 deletions
diff --git a/test/CodeGen/X86/vec_setcc.ll b/test/CodeGen/X86/vec_setcc.ll new file mode 100644 index 0000000..b1bf52d --- /dev/null +++ b/test/CodeGen/X86/vec_setcc.ll @@ -0,0 +1,126 @@ +; RUN: llc < %s -mcpu=x86-64 -mattr=sse2 | FileCheck %s -check-prefix=SSE2 +; RUN: llc < %s -mcpu=x86-64 -mattr=sse41 | FileCheck %s -check-prefix=SSE41 +; RUN: llc < %s -mcpu=x86-64 -mattr=avx | FileCheck %s -check-prefix=AVX + +define <16 x i8> @v16i8_icmp_uge(<16 x i8> %a, <16 x i8> %b) nounwind readnone ssp uwtable { + %1 = icmp uge <16 x i8> %a, %b + %2 = sext <16 x i1> %1 to <16 x i8> + ret <16 x i8> %2 +; SSE2: _v16i8_icmp_uge: +; SSE2: pmaxub %xmm0, %xmm1 +; SSE2: pcmpeqb %xmm1, %xmm0 + +; SSE41: _v16i8_icmp_uge: +; SSE41: pmaxub %xmm0, %xmm1 +; SSE41: pcmpeqb %xmm1, %xmm0 + +; AVX: _v16i8_icmp_uge: +; AVX: vpmaxub %xmm1, %xmm0, %xmm1 +; AVX: vpcmpeqb %xmm1, %xmm0, %xmm0 +} + +define <16 x i8> @v16i8_icmp_ule(<16 x i8> %a, <16 x i8> %b) nounwind readnone ssp uwtable { + %1 = icmp ule <16 x i8> %a, %b + %2 = sext <16 x i1> %1 to <16 x i8> + ret <16 x i8> %2 +; SSE2: _v16i8_icmp_ule: +; SSE2: pminub %xmm0, %xmm1 +; SSE2: pcmpeqb %xmm1, %xmm0 + +; SSE41: _v16i8_icmp_ule: +; SSE41: pminub %xmm0, %xmm1 +; SSE41: pcmpeqb %xmm1, %xmm0 + +; AVX: _v16i8_icmp_ule: +; AVX: vpminub %xmm1, %xmm0, %xmm1 +; AVX: vpcmpeqb %xmm1, %xmm0, %xmm0 +} + + +define <8 x i16> @v8i16_icmp_uge(<8 x i16> %a, <8 x i16> %b) nounwind readnone ssp uwtable { + %1 = icmp uge <8 x i16> %a, %b + %2 = sext <8 x i1> %1 to <8 x i16> + ret <8 x i16> %2 +; SSE2: _v8i16_icmp_uge: +; SSE2: movdqa LCPI2_0(%rip), %xmm2 +; SEE2: pxor %xmm2, %xmm0 +; SSE2: pxor %xmm1, %xmm2 +; SSE2: pcmpgtw %xmm0, %xmm2 +; SSE2: pcmpeqd %xmm0, %xmm0 +; SSE2: pxor %xmm2, %xmm0 + +; SSE41: _v8i16_icmp_uge: +; SSE41: pmaxuw %xmm0, %xmm1 +; SSE41: pcmpeqw %xmm1, %xmm0 + +; AVX: _v8i16_icmp_uge: +; AVX: vpmaxuw %xmm1, %xmm0, %xmm1 +; AVX: vpcmpeqw %xmm1, %xmm0, %xmm0 +} + +define <8 x i16> @v8i16_icmp_ule(<8 x i16> %a, <8 x i16> %b) nounwind readnone ssp uwtable { + %1 = icmp ule <8 x i16> %a, %b + %2 = sext <8 x i1> %1 to <8 x i16> + ret <8 x i16> %2 +; SSE2: _v8i16_icmp_ule: +; SSE2: movdqa LCPI3_0(%rip), %xmm2 +; SSE2: pxor %xmm2, %xmm1 +; SSE2: pxor %xmm2, %xmm0 +; SSE2: pcmpgtw %xmm1, %xmm0 +; SSE2: pcmpeqd %xmm1, %xmm1 +; SSE2: pxor %xmm0, %xmm1 +; SSE2: movdqa %xmm1, %xmm0 + +; SSE41: _v8i16_icmp_ule: +; SSE41: pminuw %xmm0, %xmm1 +; SSE41: pcmpeqw %xmm1, %xmm0 + +; AVX: _v8i16_icmp_ule: +; AVX: vpminuw %xmm1, %xmm0, %xmm1 +; AVX: vpcmpeqw %xmm1, %xmm0, %xmm0 +} + + +define <4 x i32> @v4i32_icmp_uge(<4 x i32> %a, <4 x i32> %b) nounwind readnone ssp uwtable { + %1 = icmp uge <4 x i32> %a, %b + %2 = sext <4 x i1> %1 to <4 x i32> + ret <4 x i32> %2 +; SSE2: _v4i32_icmp_uge: +; SSE2: movdqa LCPI4_0(%rip), %xmm2 +; SSE2: pxor %xmm2, %xmm0 +; SSE2: pxor %xmm1, %xmm2 +; SSE2: pcmpgtd %xmm0, %xmm2 +; SSE2: pcmpeqd %xmm0, %xmm0 +; SSE2: pxor %xmm2, %xmm0 + +; SSE41: _v4i32_icmp_uge: +; SSE41: pmaxud %xmm0, %xmm1 +; SSE41: pcmpeqd %xmm1, %xmm0 + +; AVX: _v4i32_icmp_uge: +; AVX: vpmaxud %xmm1, %xmm0, %xmm1 +; AVX: vpcmpeqd %xmm1, %xmm0, %xmm0 +} + +define <4 x i32> @v4i32_icmp_ule(<4 x i32> %a, <4 x i32> %b) nounwind readnone ssp uwtable { + %1 = icmp ule <4 x i32> %a, %b + %2 = sext <4 x i1> %1 to <4 x i32> + ret <4 x i32> %2 +; SSE2: _v4i32_icmp_ule: +; SSE2: movdqa LCPI5_0(%rip), %xmm2 +; SSE2: pxor %xmm2, %xmm1 +; SSE2: pxor %xmm2, %xmm0 +; SSE2: pcmpgtd %xmm1, %xmm0 +; SSE2: pcmpeqd %xmm1, %xmm1 +; SSE2: pxor %xmm0, %xmm1 +; SSE2: movdqa %xmm1, %xmm0 + +; SSE41: _v4i32_icmp_ule: +; SSE41: pminud %xmm0, %xmm1 +; SSE41: pcmpeqd %xmm1, %xmm0 + +; AVX: _v4i32_icmp_ule: +; AVX: pminud %xmm1, %xmm0, %xmm1 +; AVX: pcmpeqd %xmm1, %xmm0, %xmm0 +} + |
