From 40307c7dbe2d104784763c28697d7926793674af Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Fri, 29 Jun 2012 00:54:20 +0000 Subject: X86: add more GATHER intrinsics in LLVM Corrected type for index of llvm.x86.avx2.gather.d.pd.256 from 256-bit to 128-bit. Corrected types for src|dst|mask of llvm.x86.avx2.gather.q.ps.256 from 256-bit to 128-bit. Support the following intrinsics: llvm.x86.avx2.gather.d.q, llvm.x86.avx2.gather.q.q llvm.x86.avx2.gather.d.q.256, llvm.x86.avx2.gather.q.q.256 llvm.x86.avx2.gather.d.d, llvm.x86.avx2.gather.q.d llvm.x86.avx2.gather.d.d.256, llvm.x86.avx2.gather.q.d.256 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@159402 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/avx2-intrinsics-x86.ll | 100 +++++++++++++++++++++++++++--- test/MC/Disassembler/X86/simple-tests.txt | 20 +++++- test/MC/X86/x86_64-avx-encoding.s | 28 ++++++++- 3 files changed, 135 insertions(+), 13 deletions(-) (limited to 'test') diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll index 3fb3497..459dbb2 100644 --- a/test/CodeGen/X86/avx2-intrinsics-x86.ll +++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -988,14 +988,14 @@ declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*, <4 x i32>, <2 x double>, i8) nounwind readonly define <4 x double> @test_x86_avx2_gather_d_pd_256(<4 x double> %a0, i8* %a1, - <8 x i32> %idx, <4 x double> %mask) { + <4 x i32> %idx, <4 x double> %mask) { ; CHECK: vgatherdpd %res = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0, - i8* %a1, <8 x i32> %idx, <4 x double> %mask, i8 2) ; + i8* %a1, <4 x i32> %idx, <4 x double> %mask, i8 2) ; ret <4 x double> %res } declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*, - <8 x i32>, <4 x double>, i8) nounwind readonly + <4 x i32>, <4 x double>, i8) nounwind readonly define <2 x double> @test_x86_avx2_gather_q_pd(<2 x double> %a0, i8* %a1, <2 x i64> %idx, <2 x double> %mask) { @@ -1047,12 +1047,92 @@ define <4 x float> @test_x86_avx2_gather_q_ps(<4 x float> %a0, i8* %a1, declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, i8*, <2 x i64>, <4 x float>, i8) nounwind readonly -define <8 x float> @test_x86_avx2_gather_q_ps_256(<8 x float> %a0, i8* %a1, - <4 x i64> %idx, <8 x float> %mask) { +define <4 x float> @test_x86_avx2_gather_q_ps_256(<4 x float> %a0, i8* %a1, + <4 x i64> %idx, <4 x float> %mask) { ; CHECK: vgatherqps - %res = call <8 x float> @llvm.x86.avx2.gather.q.ps.256(<8 x float> %a0, - i8* %a1, <4 x i64> %idx, <8 x float> %mask, i8 2) ; - ret <8 x float> %res + %res = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0, + i8* %a1, <4 x i64> %idx, <4 x float> %mask, i8 2) ; + ret <4 x float> %res +} +declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, i8*, + <4 x i64>, <4 x float>, i8) nounwind readonly + +define <2 x i64> @test_x86_avx2_gather_d_q(<2 x i64> %a0, i8* %a1, + <4 x i32> %idx, <2 x i64> %mask) { + ; CHECK: vpgatherdq + %res = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0, + i8* %a1, <4 x i32> %idx, <2 x i64> %mask, i8 2) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, i8*, + <4 x i32>, <2 x i64>, i8) nounwind readonly + +define <4 x i64> @test_x86_avx2_gather_d_q_256(<4 x i64> %a0, i8* %a1, + <4 x i32> %idx, <4 x i64> %mask) { + ; CHECK: vpgatherdq + %res = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0, + i8* %a1, <4 x i32> %idx, <4 x i64> %mask, i8 2) ; + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, i8*, + <4 x i32>, <4 x i64>, i8) nounwind readonly + +define <2 x i64> @test_x86_avx2_gather_q_q(<2 x i64> %a0, i8* %a1, + <2 x i64> %idx, <2 x i64> %mask) { + ; CHECK: vpgatherqq + %res = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0, + i8* %a1, <2 x i64> %idx, <2 x i64> %mask, i8 2) ; + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, i8*, + <2 x i64>, <2 x i64>, i8) nounwind readonly + +define <4 x i64> @test_x86_avx2_gather_q_q_256(<4 x i64> %a0, i8* %a1, + <4 x i64> %idx, <4 x i64> %mask) { + ; CHECK: vpgatherqq + %res = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0, + i8* %a1, <4 x i64> %idx, <4 x i64> %mask, i8 2) ; + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, i8*, + <4 x i64>, <4 x i64>, i8) nounwind readonly + +define <4 x i32> @test_x86_avx2_gather_d_d(<4 x i32> %a0, i8* %a1, + <4 x i32> %idx, <4 x i32> %mask) { + ; CHECK: vpgatherdd + %res = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %a0, + i8* %a1, <4 x i32> %idx, <4 x i32> %mask, i8 2) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*, + <4 x i32>, <4 x i32>, i8) nounwind readonly + +define <8 x i32> @test_x86_avx2_gather_d_d_256(<8 x i32> %a0, i8* %a1, + <8 x i32> %idx, <8 x i32> %mask) { + ; CHECK: vpgatherdd + %res = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> %a0, + i8* %a1, <8 x i32> %idx, <8 x i32> %mask, i8 2) ; + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, i8*, + <8 x i32>, <8 x i32>, i8) nounwind readonly + +define <4 x i32> @test_x86_avx2_gather_q_d(<4 x i32> %a0, i8* %a1, + <2 x i64> %idx, <4 x i32> %mask) { + ; CHECK: vpgatherqd + %res = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> %a0, + i8* %a1, <2 x i64> %idx, <4 x i32> %mask, i8 2) ; + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, i8*, + <2 x i64>, <4 x i32>, i8) nounwind readonly + +define <4 x i32> @test_x86_avx2_gather_q_d_256(<4 x i32> %a0, i8* %a1, + <4 x i64> %idx, <4 x i32> %mask) { + ; CHECK: vpgatherqd + %res = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %a0, + i8* %a1, <4 x i64> %idx, <4 x i32> %mask, i8 2) ; + ret <4 x i32> %res } -declare <8 x float> @llvm.x86.avx2.gather.q.ps.256(<8 x float>, i8*, - <4 x i64>, <8 x float>, i8) nounwind readonly +declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*, + <4 x i64>, <4 x i32>, i8) nounwind readonly diff --git a/test/MC/Disassembler/X86/simple-tests.txt b/test/MC/Disassembler/X86/simple-tests.txt index c543e46..712c95a 100644 --- a/test/MC/Disassembler/X86/simple-tests.txt +++ b/test/MC/Disassembler/X86/simple-tests.txt @@ -728,9 +728,27 @@ # CHECK: vgatherdpd %xmm0, (%rdi,%xmm1,2), %xmm2 0xc4 0xe2 0xe9 0x92 0x04 0x4f -# CHECK: vgatherqps %ymm8, (%r15,%ymm9,2), %ymm10 +# CHECK: vgatherdpd %ymm0, (%rdi,%xmm1,2), %ymm2 +0xc4 0xe2 0xed 0x92 0x04 0x4f + +# CHECK: vgatherqps %xmm8, (%r15,%xmm9,2), %xmm10 +0xc4 0x02 0x29 0x93 0x04 0x4f + +# CHECK: vgatherqps %xmm8, (%r15,%ymm9,2), %xmm10 0xc4 0x02 0x2d 0x93 0x04 0x4f +# CHECK: vpgatherdq %xmm0, (%rdi,%xmm1,2), %xmm2 +0xc4 0xe2 0xe9 0x90 0x04 0x4f + +# CHECK: vpgatherdq %ymm0, (%rdi,%xmm1,2), %ymm2 +0xc4 0xe2 0xed 0x90 0x04 0x4f + +# CHECK: vpgatherqd %xmm8, (%r15,%xmm9,2), %xmm10 +0xc4 0x02 0x29 0x91 0x04 0x4f + +# CHECK: vpgatherqd %xmm8, (%r15,%ymm9,2), %xmm10 +0xc4 0x02 0x2d 0x91 0x04 0x4f + # rdar://8812056 lldb doesn't print the x86 lock prefix when disassembling # CHECK: lock # CHECK-NEXT: xaddq %rcx, %rbx diff --git a/test/MC/X86/x86_64-avx-encoding.s b/test/MC/X86/x86_64-avx-encoding.s index 77c0ff3..b9943ba 100644 --- a/test/MC/X86/x86_64-avx-encoding.s +++ b/test/MC/X86/x86_64-avx-encoding.s @@ -4126,6 +4126,30 @@ _foo2: // CHECK: encoding: [0xc4,0xe2,0xe9,0x92,0x04,0x4f] vgatherdpd %xmm0, (%rdi,%xmm1,2), %xmm2 -// CHECK: vgatherqps %ymm8, (%r15,%ymm9,2), %ymm10 +// CHECK: vgatherdpd %ymm0, (%rdi,%xmm1,2), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0xed,0x92,0x04,0x4f] + vgatherdpd %ymm0, (%rdi,%xmm1,2), %ymm2 + +// CHECK: vgatherqps %xmm8, (%r15,%xmm9,2), %xmm10 +// CHECK: encoding: [0xc4,0x02,0x29,0x93,0x04,0x4f] + vgatherqps %xmm8, (%r15,%xmm9,2), %xmm10 + +// CHECK: vgatherqps %xmm8, (%r15,%ymm9,2), %xmm10 // CHECK: encoding: [0xc4,0x02,0x2d,0x93,0x04,0x4f] - vgatherqps %ymm8, (%r15,%ymm9,2), %ymm10 + vgatherqps %xmm8, (%r15,%ymm9,2), %xmm10 + +// CHECK: vpgatherdq %xmm0, (%rdi,%xmm1,2), %xmm2 +// CHECK: encoding: [0xc4,0xe2,0xe9,0x90,0x04,0x4f] + vpgatherdq %xmm0, (%rdi,%xmm1,2), %xmm2 + +// CHECK: vpgatherdq %ymm0, (%rdi,%xmm1,2), %ymm2 +// CHECK: encoding: [0xc4,0xe2,0xed,0x90,0x04,0x4f] + vpgatherdq %ymm0, (%rdi,%xmm1,2), %ymm2 + +// CHECK: vpgatherqd %xmm8, (%r15,%xmm9,2), %xmm10 +// CHECK: encoding: [0xc4,0x02,0x29,0x91,0x04,0x4f] + vpgatherqd %xmm8, (%r15,%xmm9,2), %xmm10 + +// CHECK: vpgatherqd %xmm8, (%r15,%ymm9,2), %xmm10 +// CHECK: encoding: [0xc4,0x02,0x2d,0x91,0x04,0x4f] + vpgatherqd %xmm8, (%r15,%ymm9,2), %xmm10 -- cgit v1.1