diff options
author | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2013-10-22 09:19:28 +0000 |
---|---|---|
committer | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2013-10-22 09:19:28 +0000 |
commit | ea79feb1a87af1e0e9c0fd3bf8831c4593b56d4d (patch) | |
tree | 71244718d2df517eb48a0908ef8b22ef5e4522b7 | |
parent | 3ebe47ee13fa29f1fdcb74f82ca42770e101b40e (diff) | |
download | external_llvm-ea79feb1a87af1e0e9c0fd3bf8831c4593b56d4d.zip external_llvm-ea79feb1a87af1e0e9c0fd3bf8831c4593b56d4d.tar.gz external_llvm-ea79feb1a87af1e0e9c0fd3bf8831c4593b56d4d.tar.bz2 |
AVX-512: aligned / unaligned load and store for 512-bit integer vectors.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193156 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86InstrAVX512.td | 65 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrFragmentsSIMD.td | 1 | ||||
-rw-r--r-- | test/CodeGen/X86/avx512-mov.ll | 28 |
3 files changed, 64 insertions, 30 deletions
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 05e346d..8cf5bb4 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -1067,23 +1067,6 @@ def VMOVUPDZmr : AVX512PI<0x11, MRMDestMem, (outs), (ins f512mem:$dst, VR512:$sr SSEPackedDouble>, EVEX, EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>; -// Use vmovaps/vmovups for AVX-512 integer load/store. -// 512-bit load/store -def : Pat<(alignedloadv8i64 addr:$src), - (VMOVAPSZrm addr:$src)>; -def : Pat<(loadv8i64 addr:$src), - (VMOVUPSZrm addr:$src)>; - -def : Pat<(alignedstore512 (v8i64 VR512:$src), addr:$dst), - (VMOVAPSZmr addr:$dst, VR512:$src)>; -def : Pat<(alignedstore512 (v16i32 VR512:$src), addr:$dst), - (VMOVAPSZmr addr:$dst, VR512:$src)>; - -def : Pat<(store (v8i64 VR512:$src), addr:$dst), - (VMOVUPDZmr addr:$dst, VR512:$src)>; -def : Pat<(store (v16i32 VR512:$src), addr:$dst), - (VMOVUPSZmr addr:$dst, VR512:$src)>; - let neverHasSideEffects = 1 in { def VMOVDQA32rr : AVX512BI<0x6F, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src), @@ -1115,25 +1098,36 @@ def VMOVDQA64rm : AVX512BI<0x6F, MRMSrcMem, (outs VR512:$dst), } } -multiclass avx512_mov_int<bits<8> opc, string asm, RegisterClass RC, - RegisterClass KRC, +// 512-bit aligned load/store +def : Pat<(alignedloadv8i64 addr:$src), (VMOVDQA64rm addr:$src)>; +def : Pat<(alignedloadv16i32 addr:$src), (VMOVDQA32rm addr:$src)>; + +def : Pat<(alignedstore512 (v8i64 VR512:$src), addr:$dst), + (VMOVDQA64mr addr:$dst, VR512:$src)>; +def : Pat<(alignedstore512 (v16i32 VR512:$src), addr:$dst), + (VMOVDQA32mr addr:$dst, VR512:$src)>; + +multiclass avx512_mov_int<bits<8> load_opc, bits<8> store_opc, string asm, + RegisterClass RC, RegisterClass KRC, PatFrag ld_frag, X86MemOperand x86memop> { let neverHasSideEffects = 1 in - def rr : AVX512XSI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), []>, - EVEX; + def rr : AVX512XSI<load_opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), []>, EVEX; let canFoldAsLoad = 1 in - def rm : AVX512XSI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (ld_frag addr:$src))]>, - EVEX; + def rm : AVX512XSI<load_opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), + [(set RC:$dst, (ld_frag addr:$src))]>, EVEX; +let mayStore = 1 in + def mr : AVX512XSI<store_opc, MRMDestMem, (outs), + (ins x86memop:$dst, VR512:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), []>, EVEX; let Constraints = "$src1 = $dst" in { - def rrk : AVX512XSI<opc, MRMSrcReg, (outs RC:$dst), + def rrk : AVX512XSI<load_opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, KRC:$mask, RC:$src2), !strconcat(asm, "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), []>, EVEX, EVEX_K; - def rmk : AVX512XSI<opc, MRMSrcMem, (outs RC:$dst), + def rmk : AVX512XSI<load_opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, KRC:$mask, x86memop:$src2), !strconcat(asm, "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"), @@ -1141,11 +1135,22 @@ let Constraints = "$src1 = $dst" in { } } -defm VMOVDQU32 : avx512_mov_int<0x6F, "vmovdqu32", VR512, VK16WM, memopv16i32, i512mem>, +defm VMOVDQU32 : avx512_mov_int<0x6F, 0x7F, "vmovdqu32", VR512, VK16WM, + memopv16i32, i512mem>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VMOVDQU64 : avx512_mov_int<0x6F, "vmovdqu64", VR512, VK8WM, memopv8i64, i512mem>, +defm VMOVDQU64 : avx512_mov_int<0x6F, 0x7F, "vmovdqu64", VR512, VK8WM, + memopv8i64, i512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +// 512-bit unaligned load/store +def : Pat<(loadv8i64 addr:$src), (VMOVDQU64rm addr:$src)>; +def : Pat<(loadv16i32 addr:$src), (VMOVDQU32rm addr:$src)>; + +def : Pat<(store (v8i64 VR512:$src), addr:$dst), + (VMOVDQU64mr addr:$dst, VR512:$src)>; +def : Pat<(store (v16i32 VR512:$src), addr:$dst), + (VMOVDQU32mr addr:$dst, VR512:$src)>; + let AddedComplexity = 20 in { def : Pat<(v16f32 (vselect VK16WM:$mask, (v16f32 VR512:$src1), (v16f32 VR512:$src2))), diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index c86b512..1fed424 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -289,6 +289,7 @@ def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>; // 512-bit load pattern fragments def loadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (load node:$ptr))>; def loadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (load node:$ptr))>; +def loadv16i32 : PatFrag<(ops node:$ptr), (v16i32 (load node:$ptr))>; def loadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (load node:$ptr))>; // 128-/256-/512-bit extload pattern fragments diff --git a/test/CodeGen/X86/avx512-mov.ll b/test/CodeGen/X86/avx512-mov.ll index 6c5c586..91242b1 100644 --- a/test/CodeGen/X86/avx512-mov.ll +++ b/test/CodeGen/X86/avx512-mov.ll @@ -125,3 +125,31 @@ define <4 x i32> @test15(i32* %x) { %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0 ret <4 x i32>%res } + +; CHECK-LABEL: test16 +; CHECK: vmovdqu32 +; CHECK: ret +define <16 x i32> @test16(i8 * %addr) { + %vaddr = bitcast i8* %addr to <16 x i32>* + %res = load <16 x i32>* %vaddr, align 1 + ret <16 x i32>%res +} + +; CHECK-LABEL: test17 +; CHECK: vmovdqa32 +; CHECK: ret +define <16 x i32> @test17(i8 * %addr) { + %vaddr = bitcast i8* %addr to <16 x i32>* + %res = load <16 x i32>* %vaddr, align 64 + ret <16 x i32>%res +} + +; CHECK-LABEL: test18 +; CHECK: vmovdqa64 +; CHECK: ret +define void @test18(i8 * %addr, <8 x i64> %data) { + %vaddr = bitcast i8* %addr to <8 x i64>* + store <8 x i64>%data, <8 x i64>* %vaddr, align 64 + ret void +} + |