diff options
Diffstat (limited to 'lib/Target/X86/X86InstrAVX512.td')
-rw-r--r-- | lib/Target/X86/X86InstrAVX512.td | 301 |
1 files changed, 234 insertions, 67 deletions
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 37bcc52..41e900e 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -476,6 +476,28 @@ defm VPBROADCASTQZ : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem, loadi64, VR512, v8i64, v2i64, VK8WM>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +multiclass avx512_int_subvec_broadcast_rm<bits<8> opc, string OpcodeStr, + X86MemOperand x86memop, PatFrag ld_frag, + RegisterClass KRC> { + let mayLoad = 1 in { + def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins x86memop:$src), + !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), + []>, EVEX; + def krm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins KRC:$mask, + x86memop:$src), + !strconcat(OpcodeStr, + " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"), + []>, EVEX, EVEX_KZ; + } +} + +defm VBROADCASTI32X4 : avx512_int_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", + i128mem, loadv2i64, VK16WM>, + EVEX_V512, EVEX_CD8<32, CD8VT4>; +defm VBROADCASTI64X4 : avx512_int_subvec_broadcast_rm<0x5b, "vbroadcasti64x4", + i256mem, loadv4i64, VK16WM>, VEX_W, + EVEX_V512, EVEX_CD8<64, CD8VT4>; + def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_512 (v4i32 VR128X:$src))), (VPBROADCASTDZrr VR128X:$src)>; def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))), @@ -517,10 +539,12 @@ def rr : AVX512XS8I<opc, MRMDestReg, (outs DstRC:$dst), (ins KRC:$src), []>, EVEX; } +let Predicates = [HasCDI] in { defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", VR512, VK16, v16i32, v16i1>, EVEX_V512; defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", VR512, VK8, v8i64, v8i1>, EVEX_V512, VEX_W; +} //===----------------------------------------------------------------------===// // AVX-512 - VPERM @@ -585,7 +609,7 @@ defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem, // -- VPERM2I - 3 source operands form -- multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr, RegisterClass RC, PatFrag mem_frag, X86MemOperand x86memop, - SDNode OpNode, ValueType OpVT> { + SDNode OpNode, ValueType OpVT, RegisterClass KRC> { let Constraints = "$src1 = $dst" in { def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, RC:$src3), @@ -595,48 +619,107 @@ let Constraints = "$src1 = $dst" in { (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>, EVEX_4V; + def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3), + !strconcat(OpcodeStr, + " \t{$src3, $src2, $dst {${mask}}|" + "$dst {${mask}}, $src2, $src3}"), + [(set RC:$dst, (OpVT (vselect KRC:$mask, + (OpNode RC:$src1, RC:$src2, + RC:$src3), + RC:$src1)))]>, + EVEX_4V, EVEX_K; + + let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<> + def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3), + !strconcat(OpcodeStr, + " \t{$src3, $src2, $dst {${mask}} {z} |", + "$dst {${mask}} {z}, $src2, $src3}"), + [(set RC:$dst, (OpVT (vselect KRC:$mask, + (OpNode RC:$src1, RC:$src2, + RC:$src3), + (OpVT (bitconvert + (v16i32 immAllZerosV))))))]>, + EVEX_4V, EVEX_KZ; + def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, RC:$src2, x86memop:$src3), !strconcat(OpcodeStr, " \t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, - (OpVT (OpNode RC:$src1, RC:$src2, + (OpVT (OpNode RC:$src1, RC:$src2, (mem_frag addr:$src3))))]>, EVEX_4V; + + def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3), + !strconcat(OpcodeStr, + " \t{$src3, $src2, $dst {${mask}}|" + "$dst {${mask}}, $src2, $src3}"), + [(set RC:$dst, + (OpVT (vselect KRC:$mask, + (OpNode RC:$src1, RC:$src2, + (mem_frag addr:$src3)), + RC:$src1)))]>, + EVEX_4V, EVEX_K; + + let AddedComplexity = 10 in // Prefer over the rrkz variant + def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3), + !strconcat(OpcodeStr, + " \t{$src3, $src2, $dst {${mask}} {z}|" + "$dst {${mask}} {z}, $src2, $src3}"), + [(set RC:$dst, + (OpVT (vselect KRC:$mask, + (OpNode RC:$src1, RC:$src2, + (mem_frag addr:$src3)), + (OpVT (bitconvert + (v16i32 immAllZerosV))))))]>, + EVEX_4V, EVEX_KZ; } } -defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv16i32, i512mem, - X86VPermiv3, v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, memopv8i64, i512mem, - X86VPermiv3, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, memopv16f32, i512mem, - X86VPermiv3, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64, i512mem, - X86VPermiv3, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -defm VPERMT2D : avx512_perm_3src<0x7E, "vpermt2d", VR512, memopv16i32, i512mem, - X86VPermv3, v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMT2Q : avx512_perm_3src<0x7E, "vpermt2q", VR512, memopv8i64, i512mem, - X86VPermv3, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPERMT2PS : avx512_perm_3src<0x7F, "vpermt2ps", VR512, memopv16f32, i512mem, - X86VPermv3, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMT2PD : avx512_perm_3src<0x7F, "vpermt2pd", VR512, memopv8f64, i512mem, - X86VPermv3, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -def : Pat<(v16f32 (int_x86_avx512_mask_vpermt_ps_512 (v16i32 VR512:$idx), - (v16f32 VR512:$src1), (v16f32 VR512:$src2), (i16 -1))), - (VPERMT2PSrr VR512:$src1, VR512:$idx, VR512:$src2)>; - -def : Pat<(v16i32 (int_x86_avx512_mask_vpermt_d_512 (v16i32 VR512:$idx), - (v16i32 VR512:$src1), (v16i32 VR512:$src2), (i16 -1))), - (VPERMT2Drr VR512:$src1, VR512:$idx, VR512:$src2)>; - -def : Pat<(v8f64 (int_x86_avx512_mask_vpermt_pd_512 (v8i64 VR512:$idx), - (v8f64 VR512:$src1), (v8f64 VR512:$src2), (i8 -1))), - (VPERMT2PDrr VR512:$src1, VR512:$idx, VR512:$src2)>; - -def : Pat<(v8i64 (int_x86_avx512_mask_vpermt_q_512 (v8i64 VR512:$idx), - (v8i64 VR512:$src1), (v8i64 VR512:$src2), (i8 -1))), - (VPERMT2Qrr VR512:$src1, VR512:$idx, VR512:$src2)>; +defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv16i32, + i512mem, X86VPermiv3, v16i32, VK16WM>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, memopv8i64, + i512mem, X86VPermiv3, v8i64, VK8WM>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, memopv16f32, + i512mem, X86VPermiv3, v16f32, VK16WM>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64, + i512mem, X86VPermiv3, v8f64, VK8WM>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + +multiclass avx512_perm_table_3src<bits<8> opc, string Suffix, RegisterClass RC, + PatFrag mem_frag, X86MemOperand x86memop, + SDNode OpNode, ValueType OpVT, RegisterClass KRC, + ValueType MaskVT, RegisterClass MRC> : + avx512_perm_3src<opc, "vpermt2"##Suffix, RC, mem_frag, x86memop, OpNode, + OpVT, KRC> { + def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512") + VR512:$idx, VR512:$src1, VR512:$src2, -1)), + (!cast<Instruction>(NAME#rr) VR512:$src1, VR512:$idx, VR512:$src2)>; + + def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512") + VR512:$idx, VR512:$src1, VR512:$src2, MRC:$mask)), + (!cast<Instruction>(NAME#rrk) VR512:$src1, + (MaskVT (COPY_TO_REGCLASS MRC:$mask, KRC)), VR512:$idx, VR512:$src2)>; +} + +defm VPERMT2D : avx512_perm_table_3src<0x7E, "d", VR512, memopv16i32, i512mem, + X86VPermv3, v16i32, VK16WM, v16i1, GR16>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPERMT2Q : avx512_perm_table_3src<0x7E, "q", VR512, memopv8i64, i512mem, + X86VPermv3, v8i64, VK8WM, v8i1, GR8>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPERMT2PS : avx512_perm_table_3src<0x7F, "ps", VR512, memopv16f32, i512mem, + X86VPermv3, v16f32, VK16WM, v16i1, GR16>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPERMT2PD : avx512_perm_table_3src<0x7F, "pd", VR512, memopv8f64, i512mem, + X86VPermv3, v8f64, VK8WM, v8i1, GR8>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + //===----------------------------------------------------------------------===// // AVX-512 - BLEND using mask // @@ -790,52 +873,61 @@ def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)), (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>; -multiclass avx512_icmp_cc<bits<8> opc, RegisterClass KRC, +multiclass avx512_icmp_cc<bits<8> opc, RegisterClass WMRC, RegisterClass KRC, RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag, - SDNode OpNode, ValueType vt, Operand CC, string asm, - string asm_alt> { + SDNode OpNode, ValueType vt, Operand CC, string Suffix> { def rri : AVX512AIi8<opc, MRMSrcReg, - (outs KRC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, + (outs KRC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), + !strconcat("vpcmp${cc}", Suffix, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2), imm:$cc))], IIC_SSE_ALU_F32P_RR>, EVEX_4V; def rmi : AVX512AIi8<opc, MRMSrcMem, - (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, + (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), + !strconcat("vpcmp${cc}", Suffix, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set KRC:$dst, (OpNode (vt RC:$src1), (memop_frag addr:$src2), imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V; // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1, hasSideEffects = 0 in { def rri_alt : AVX512AIi8<opc, MRMSrcReg, (outs KRC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), - asm_alt, [], IIC_SSE_ALU_F32P_RR>, EVEX_4V; + !strconcat("vpcmp", Suffix, + "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), + [], IIC_SSE_ALU_F32P_RR>, EVEX_4V; + def rrik_alt : AVX512AIi8<opc, MRMSrcReg, + (outs KRC:$dst), (ins WMRC:$mask, RC:$src1, RC:$src2, i8imm:$cc), + !strconcat("vpcmp", Suffix, + "\t{$cc, $src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2, $cc}"), + [], IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K; def rmi_alt : AVX512AIi8<opc, MRMSrcMem, (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), - asm_alt, [], IIC_SSE_ALU_F32P_RM>, EVEX_4V; + !strconcat("vpcmp", Suffix, + "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), + [], IIC_SSE_ALU_F32P_RM>, EVEX_4V; + def rmik_alt : AVX512AIi8<opc, MRMSrcMem, + (outs KRC:$dst), (ins WMRC:$mask, RC:$src1, x86memop:$src2, i8imm:$cc), + !strconcat("vpcmp", Suffix, + "\t{$cc, $src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2, $cc}"), + [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K; } } -defm VPCMPDZ : avx512_icmp_cc<0x1F, VK16, VR512, i512mem, memopv16i32, - X86cmpm, v16i32, AVXCC, - "vpcmp${cc}d\t{$src2, $src1, $dst|$dst, $src1, $src2}", - "vpcmpd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPCMPUDZ : avx512_icmp_cc<0x1E, VK16, VR512, i512mem, memopv16i32, - X86cmpmu, v16i32, AVXCC, - "vpcmp${cc}ud\t{$src2, $src1, $dst|$dst, $src1, $src2}", - "vpcmpud\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, - EVEX_V512, EVEX_CD8<32, CD8VF>; - -defm VPCMPQZ : avx512_icmp_cc<0x1F, VK8, VR512, i512mem, memopv8i64, - X86cmpm, v8i64, AVXCC, - "vpcmp${cc}q\t{$src2, $src1, $dst|$dst, $src1, $src2}", - "vpcmpq\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, - VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; -defm VPCMPUQZ : avx512_icmp_cc<0x1E, VK8, VR512, i512mem, memopv8i64, - X86cmpmu, v8i64, AVXCC, - "vpcmp${cc}uq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - "vpcmpuq\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, - VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; - -// avx512_cmp_packed - sse 1 & 2 compare packed instructions +defm VPCMPDZ : avx512_icmp_cc<0x1F, VK16WM, VK16, VR512, i512mem, memopv16i32, + X86cmpm, v16i32, AVXCC, "d">, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPCMPUDZ : avx512_icmp_cc<0x1E, VK16WM, VK16, VR512, i512mem, memopv16i32, + X86cmpmu, v16i32, AVXCC, "ud">, + EVEX_V512, EVEX_CD8<32, CD8VF>; + +defm VPCMPQZ : avx512_icmp_cc<0x1F, VK8WM, VK8, VR512, i512mem, memopv8i64, + X86cmpm, v8i64, AVXCC, "q">, + VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; +defm VPCMPUQZ : avx512_icmp_cc<0x1E, VK8WM, VK8, VR512, i512mem, memopv8i64, + X86cmpmu, v8i64, AVXCC, "uq">, + VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; + +// avx512_cmp_packed - compare packed instructions multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC, X86MemOperand x86memop, ValueType vt, string suffix, Domain d> { @@ -859,11 +951,11 @@ multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC, // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1, hasSideEffects = 0 in { def rri_alt : AVX512PIi8<0xC2, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), + (outs KRC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), !strconcat("vcmp", suffix, " \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>; def rmi_alt : AVX512PIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), + (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), !strconcat("vcmp", suffix, " \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>; } @@ -1788,6 +1880,46 @@ def : Pat<(v8i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))), (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>; //===----------------------------------------------------------------------===// +// AVX-512 - Non-temporals +//===----------------------------------------------------------------------===// + +def VMOVNTDQAZrm : AVX5128I<0x2A, MRMSrcMem, (outs VR512:$dst), + (ins i512mem:$src), + "vmovntdqa\t{$src, $dst|$dst, $src}", + [(set VR512:$dst, + (int_x86_avx512_movntdqa addr:$src))]>, + EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>; + +// Prefer non-temporal over temporal versions +let AddedComplexity = 400, SchedRW = [WriteStore] in { + +def VMOVNTPSZmr : AVX512PSI<0x2B, MRMDestMem, (outs), + (ins f512mem:$dst, VR512:$src), + "vmovntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v16f32 VR512:$src), + addr:$dst)], + IIC_SSE_MOVNT>, + EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>; + +def VMOVNTPDZmr : AVX512PDI<0x2B, MRMDestMem, (outs), + (ins f512mem:$dst, VR512:$src), + "vmovntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v8f64 VR512:$src), + addr:$dst)], + IIC_SSE_MOVNT>, + EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + + +def VMOVNTDQZmr : AVX512BI<0xE7, MRMDestMem, (outs), + (ins i512mem:$dst, VR512:$src), + "vmovntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v8i64 VR512:$src), + addr:$dst)], + IIC_SSE_MOVNT>, + EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>; +} + +//===----------------------------------------------------------------------===// // AVX-512 - Integer arithmetic // multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, @@ -3161,6 +3293,10 @@ def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))), (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr (v16i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>; +def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))), + (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr + (v8i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_ymm)>; + def : Pat<(v16f32 (int_x86_avx512_mask_cvtdq2ps_512 (v16i32 VR512:$src), (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)), (VCVTDQ2PSZrrb VR512:$src, imm:$rc)>; @@ -4343,6 +4479,37 @@ def : Pat<(int_x86_avx512_mask_conflict_q_512 VR512:$src2, VR512:$src1, (VPCONFLICTQrrk VR512:$src1, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>; +let Predicates = [HasCDI] in { +defm VPLZCNTD : avx512_conflict<0x44, "vplzcntd", VR512, VK16WM, + i512mem, i32mem, "{1to16}">, + EVEX_V512, EVEX_CD8<32, CD8VF>; + + +defm VPLZCNTQ : avx512_conflict<0x44, "vplzcntq", VR512, VK8WM, + i512mem, i64mem, "{1to8}">, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + +} + +def : Pat<(int_x86_avx512_mask_lzcnt_d_512 VR512:$src2, VR512:$src1, + GR16:$mask), + (VPLZCNTDrrk VR512:$src1, + (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>; + +def : Pat<(int_x86_avx512_mask_lzcnt_q_512 VR512:$src2, VR512:$src1, + GR8:$mask), + (VPLZCNTQrrk VR512:$src1, + (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>; + +def : Pat<(v16i32 (ctlz (memopv16i32 addr:$src))), + (VPLZCNTDrm addr:$src)>; +def : Pat<(v16i32 (ctlz (v16i32 VR512:$src))), + (VPLZCNTDrr VR512:$src)>; +def : Pat<(v8i64 (ctlz (memopv8i64 addr:$src))), + (VPLZCNTQrm addr:$src)>; +def : Pat<(v8i64 (ctlz (v8i64 VR512:$src))), + (VPLZCNTQrr VR512:$src)>; + def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>; def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>; def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>; |