diff options
author | Craig Topper <craig.topper@gmail.com> | 2013-08-16 06:07:34 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2013-08-16 06:07:34 +0000 |
commit | 0163356ad1944dda162956993a95e547dc03251b (patch) | |
tree | 859346547558403c894ae107c9665416ce424ced /lib/Target/X86 | |
parent | 7f92c7b52c47adcf4bc71e7c2f3603fe745a0e7a (diff) | |
download | external_llvm-0163356ad1944dda162956993a95e547dc03251b.zip external_llvm-0163356ad1944dda162956993a95e547dc03251b.tar.gz external_llvm-0163356ad1944dda162956993a95e547dc03251b.tar.bz2 |
Don't use v16i32 for load pattern matching. All 512-bit loads are cated to v8i64.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188534 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86')
-rw-r--r-- | lib/Target/X86/X86InstrAVX512.td | 14 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrFragmentsSIMD.td | 10 |
2 files changed, 12 insertions, 12 deletions
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index f4528a9..d100e88 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -555,7 +555,7 @@ let Constraints = "$src1 = $dst" in { (bitconvert (mem_frag addr:$src3)))))]>, EVEX_4V; } } -defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv16i32, i512mem, +defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv8i64, i512mem, v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, memopv8i64, i512mem, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; @@ -1107,7 +1107,7 @@ def VMOVDQA64rm : AVX512BI<0x6F, MRMSrcMem, (outs VR512:$dst), } multiclass avx512_mov_int<bits<8> opc, string asm, RegisterClass RC, - RegisterClass KRC, + RegisterClass KRC, PatFrag bc_frag, PatFrag ld_frag, X86MemOperand x86memop> { let neverHasSideEffects = 1 in def rr : AVX512XSI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), @@ -1116,7 +1116,7 @@ let neverHasSideEffects = 1 in let canFoldAsLoad = 1 in def rm : AVX512XSI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (ld_frag addr:$src))]>, + [(set RC:$dst, (bc_frag (ld_frag addr:$src)))]>, EVEX; let Constraints = "$src1 = $dst" in { def rrk : AVX512XSI<opc, MRMSrcReg, (outs RC:$dst), @@ -1132,10 +1132,10 @@ let Constraints = "$src1 = $dst" in { } } -defm VMOVDQU32 : avx512_mov_int<0x6F, "vmovdqu32", VR512, VK16WM, memopv16i32, i512mem>, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VMOVDQU64 : avx512_mov_int<0x6F, "vmovdqu64", VR512, VK8WM, memopv8i64, i512mem>, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +defm VMOVDQU32 : avx512_mov_int<0x6F, "vmovdqu32", VR512, VK16WM, bc_v16i32, + memopv8i64, i512mem>, EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VMOVDQU64 : avx512_mov_int<0x6F, "vmovdqu64", VR512, VK8WM, bc_v8i64, + memopv8i64, i512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; let AddedComplexity = 20 in { def : Pat<(v16f32 (vselect VK16WM:$mask, (v16f32 VR512:$src1), diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 3d6370f..fe35393 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -277,7 +277,6 @@ def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>; // 512-bit load pattern fragments def loadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (load node:$ptr))>; def loadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (load node:$ptr))>; -def loadv16i32 : PatFrag<(ops node:$ptr), (v16i32 (load node:$ptr))>; def loadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (load node:$ptr))>; // 128-/256-/512-bit extload pattern fragments @@ -351,8 +350,6 @@ def alignedloadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (alignedload512 node:$ptr))>; def alignedloadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (alignedload512 node:$ptr))>; -def alignedloadv16i32 : PatFrag<(ops node:$ptr), - (v16i32 (alignedload512 node:$ptr))>; def alignedloadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (alignedload512 node:$ptr))>; @@ -379,14 +376,12 @@ def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>; // 256-bit memop pattern fragments // NOTE: all 256-bit integer vector loads are promoted to v4i64 def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>; -def memopv8i32 : PatFrag<(ops node:$ptr), (v8i32 (memop node:$ptr))>; def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>; def memopv4i64 : PatFrag<(ops node:$ptr), (v4i64 (memop node:$ptr))>; // 512-bit memop pattern fragments def memopv16f32 : PatFrag<(ops node:$ptr), (v16f32 (memop node:$ptr))>; def memopv8f64 : PatFrag<(ops node:$ptr), (v8f64 (memop node:$ptr))>; -def memopv16i32 : PatFrag<(ops node:$ptr), (v16i32 (memop node:$ptr))>; def memopv8i64 : PatFrag<(ops node:$ptr), (v8i64 (memop node:$ptr))>; // SSSE3 uses MMX registers for some instructions. They aren't aligned on a @@ -438,6 +433,11 @@ def bc_v16i16 : PatFrag<(ops node:$in), (v16i16 (bitconvert node:$in))>; def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>; def bc_v4i64 : PatFrag<(ops node:$in), (v4i64 (bitconvert node:$in))>; +// 512-bit bitconvert pattern fragments +def bc_v16i32 : PatFrag<(ops node:$in), (v16i32 (bitconvert node:$in))>; +def bc_v8i64 : PatFrag<(ops node:$in), (v8i64 (bitconvert node:$in))>; + + def vzmovl_v2i64 : PatFrag<(ops node:$src), (bitconvert (v2i64 (X86vzmovl (v2i64 (scalar_to_vector (loadi64 node:$src))))))>; |