diff options
author | Jiangning Liu <jiangning.liu@arm.com> | 2013-09-09 02:20:27 +0000 |
---|---|---|
committer | Jiangning Liu <jiangning.liu@arm.com> | 2013-09-09 02:20:27 +0000 |
commit | 959cd8f49bb85c8dfe971eb5a8a648ff41ca8ebd (patch) | |
tree | 746a5fdf146ee7373e073b6c7b163147e7a693d4 /lib | |
parent | 56736c18c14e89a386dae969e33a31ce685a0a1c (diff) | |
download | external_llvm-959cd8f49bb85c8dfe971eb5a8a648ff41ca8ebd.zip external_llvm-959cd8f49bb85c8dfe971eb5a8a648ff41ca8ebd.tar.gz external_llvm-959cd8f49bb85c8dfe971eb5a8a648ff41ca8ebd.tar.bz2 |
Implement aarch64 neon instruction set AdvSIMD (3V Diff), covering the following 26 instructions,
SADDL, UADDL, SADDW, UADDW, SSUBL, USUBL, SSUBW, USUBW, ADDHN, RADDHN, SABAL, UABAL, SUBHN, RSUBHN, SABDL, UABDL, SMLAL, UMLAL, SMLSL, UMLSL, SQDMLAL, SQDMLSL, SMULL, UMULL, SQDMULL, PMULL
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190288 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/AArch64/AArch64InstrFormats.td | 20 | ||||
-rw-r--r-- | lib/Target/AArch64/AArch64InstrNEON.td | 677 |
2 files changed, 696 insertions, 1 deletions
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index 020ee6c..dd35367 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -990,6 +990,26 @@ class NeonI_3VSame<bit q, bit u, bits<2> size, bits<5> opcode, // Inherit Rd in 4-0 } +// Format AdvSIMD 3 vector registers with different vector type +class NeonI_3VDiff<bit q, bit u, bits<2> size, bits<4> opcode, + dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> +{ + let Inst{31} = 0b0; + let Inst{30} = q; + let Inst{29} = u; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21} = 0b1; + // Inherit Rm in 20-16 + let Inst{15-12} = opcode; + let Inst{11} = 0b0; + let Inst{10} = 0b0; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + // Format AdvSIMD 1 vector register with modified immediate class NeonI_1VModImm<bit q, bit op, dag outs, dag ins, string asmstr, diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index 9712a5a..b8840aa 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -47,7 +47,7 @@ def Neon_dupImm : SDNode<"AArch64ISD::NEON_DUPIMM", SDTypeProfile<1, 1, def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>; def Neon_sqrshlImm : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>; -def Neon_uqrshlImm : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>; +def Neon_uqrshlImm : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>; //===----------------------------------------------------------------------===// @@ -2143,6 +2143,681 @@ defm VCVTf2xs : NeonI_N2VCvt_Fp2fx<0, 0b11111, "fcvtzs", defm VCVTf2xu : NeonI_N2VCvt_Fp2fx<1, 0b11111, "fcvtzu", int_arm_neon_vcvtfp2fxu>; +multiclass Neon_sshll2_0<SDNode ext> +{ + def _v8i8 : PatFrag<(ops node:$Rn), + (v8i16 (ext (v8i8 (Neon_top16B node:$Rn))))>; + def _v4i16 : PatFrag<(ops node:$Rn), + (v4i32 (ext (v4i16 (Neon_top8H node:$Rn))))>; + def _v2i32 : PatFrag<(ops node:$Rn), + (v2i64 (ext (v2i32 (Neon_top4S node:$Rn))))>; +} + +defm NI_sext_high : Neon_sshll2_0<sext>; +defm NI_zext_high : Neon_sshll2_0<zext>; + +// The followings are for instruction class (3V Diff) + +// normal long/long2 pattern +class NeonI_3VDL<bit q, bit u, bits<2> size, bits<4> opcode, + string asmop, string ResS, string OpS, + SDPatternOperator opnode, SDPatternOperator ext, + RegisterClass OpVPR, + ValueType ResTy, ValueType OpTy> + : NeonI_3VDiff<q, u, size, opcode, + (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm), + asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, + [(set (ResTy VPR128:$Rd), + (ResTy (opnode (ResTy (ext (OpTy OpVPR:$Rn))), + (ResTy (ext (OpTy OpVPR:$Rm))))))], + NoItinerary>; + +multiclass NeonI_3VDL_s<bit u, bits<4> opcode, + string asmop, SDPatternOperator opnode, + bit Commutable = 0> +{ + let isCommutable = Commutable in { + def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b", + opnode, sext, VPR64, v8i16, v8i8>; + def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h", + opnode, sext, VPR64, v4i32, v4i16>; + def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s", + opnode, sext, VPR64, v2i64, v2i32>; + } +} + +multiclass NeonI_3VDL2_s<bit u, bits<4> opcode, + string asmop, SDPatternOperator opnode, + bit Commutable = 0> +{ + let isCommutable = Commutable in { + def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b", + opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>; + def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h", + opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>; + def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s", + opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>; + } +} + +multiclass NeonI_3VDL_u<bit u, bits<4> opcode, + string asmop, SDPatternOperator opnode, + bit Commutable = 0> +{ + let isCommutable = Commutable in { + def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b", + opnode, zext, VPR64, v8i16, v8i8>; + def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h", + opnode, zext, VPR64, v4i32, v4i16>; + def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s", + opnode, zext, VPR64, v2i64, v2i32>; + } +} + +multiclass NeonI_3VDL2_u<bit u, bits<4> opcode, + string asmop, SDPatternOperator opnode, + bit Commutable = 0> +{ + let isCommutable = Commutable in { + def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b", + opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>; + def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h", + opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>; + def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s", + opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>; + } +} + +defm SADDLvvv : NeonI_3VDL_s<0b0, 0b0000, "saddl", add, 1>; +defm UADDLvvv : NeonI_3VDL_u<0b1, 0b0000, "uaddl", add, 1>; + +defm SADDL2vvv : NeonI_3VDL2_s<0b0, 0b0000, "saddl2", add, 1>; +defm UADDL2vvv : NeonI_3VDL2_u<0b1, 0b0000, "uaddl2", add, 1>; + +defm SSUBLvvv : NeonI_3VDL_s<0b0, 0b0010, "ssubl", sub, 0>; +defm USUBLvvv : NeonI_3VDL_u<0b1, 0b0010, "usubl", sub, 0>; + +defm SSUBL2vvv : NeonI_3VDL2_s<0b0, 0b0010, "ssubl2", sub, 0>; +defm USUBL2vvv : NeonI_3VDL2_u<0b1, 0b0010, "usubl2", sub, 0>; + +// normal wide/wide2 pattern +class NeonI_3VDW<bit q, bit u, bits<2> size, bits<4> opcode, + string asmop, string ResS, string OpS, + SDPatternOperator opnode, SDPatternOperator ext, + RegisterClass OpVPR, + ValueType ResTy, ValueType OpTy> + : NeonI_3VDiff<q, u, size, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn, OpVPR:$Rm), + asmop # "\t$Rd." # ResS # ", $Rn." # ResS # ", $Rm." # OpS, + [(set (ResTy VPR128:$Rd), + (ResTy (opnode (ResTy VPR128:$Rn), + (ResTy (ext (OpTy OpVPR:$Rm))))))], + NoItinerary>; + +multiclass NeonI_3VDW_s<bit u, bits<4> opcode, + string asmop, SDPatternOperator opnode> +{ + def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b", + opnode, sext, VPR64, v8i16, v8i8>; + def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h", + opnode, sext, VPR64, v4i32, v4i16>; + def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s", + opnode, sext, VPR64, v2i64, v2i32>; +} + +defm SADDWvvv : NeonI_3VDW_s<0b0, 0b0001, "saddw", add>; +defm SSUBWvvv : NeonI_3VDW_s<0b0, 0b0011, "ssubw", sub>; + +multiclass NeonI_3VDW2_s<bit u, bits<4> opcode, + string asmop, SDPatternOperator opnode> +{ + def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b", + opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>; + def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h", + opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>; + def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s", + opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>; +} + +defm SADDW2vvv : NeonI_3VDW2_s<0b0, 0b0001, "saddw2", add>; +defm SSUBW2vvv : NeonI_3VDW2_s<0b0, 0b0011, "ssubw2", sub>; + +multiclass NeonI_3VDW_u<bit u, bits<4> opcode, + string asmop, SDPatternOperator opnode> +{ + def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b", + opnode, zext, VPR64, v8i16, v8i8>; + def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h", + opnode, zext, VPR64, v4i32, v4i16>; + def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s", + opnode, zext, VPR64, v2i64, v2i32>; +} + +defm UADDWvvv : NeonI_3VDW_u<0b1, 0b0001, "uaddw", add>; +defm USUBWvvv : NeonI_3VDW_u<0b1, 0b0011, "usubw", sub>; + +multiclass NeonI_3VDW2_u<bit u, bits<4> opcode, + string asmop, SDPatternOperator opnode> +{ + def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b", + opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>; + def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h", + opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>; + def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s", + opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>; +} + +defm UADDW2vvv : NeonI_3VDW2_u<0b1, 0b0001, "uaddw2", add>; +defm USUBW2vvv : NeonI_3VDW2_u<0b1, 0b0011, "usubw2", sub>; + +// Get the high half part of the vector element. +multiclass NeonI_get_high +{ + def _8h : PatFrag<(ops node:$Rn), + (v8i8 (trunc (v8i16 (srl (v8i16 node:$Rn), + (v8i16 (Neon_dupImm 8))))))>; + def _4s : PatFrag<(ops node:$Rn), + (v4i16 (trunc (v4i32 (srl (v4i32 node:$Rn), + (v4i32 (Neon_dupImm 16))))))>; + def _2d : PatFrag<(ops node:$Rn), + (v2i32 (trunc (v2i64 (srl (v2i64 node:$Rn), + (v2i64 (Neon_dupImm 32))))))>; +} + +defm NI_get_hi : NeonI_get_high; + +// pattern for addhn/subhn with 2 operands +class NeonI_3VDN_addhn_2Op<bit q, bit u, bits<2> size, bits<4> opcode, + string asmop, string ResS, string OpS, + SDPatternOperator opnode, SDPatternOperator get_hi, + ValueType ResTy, ValueType OpTy> + : NeonI_3VDiff<q, u, size, opcode, + (outs VPR64:$Rd), (ins VPR128:$Rn, VPR128:$Rm), + asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, + [(set (ResTy VPR64:$Rd), + (ResTy (get_hi + (OpTy (opnode (OpTy VPR128:$Rn), + (OpTy VPR128:$Rm))))))], + NoItinerary>; + +multiclass NeonI_3VDN_addhn_2Op<bit u, bits<4> opcode, + string asmop, SDPatternOperator opnode, + bit Commutable = 0> +{ + let isCommutable = Commutable in { + def _8b8h : NeonI_3VDN_addhn_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h", + opnode, NI_get_hi_8h, v8i8, v8i16>; + def _4h4s : NeonI_3VDN_addhn_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s", + opnode, NI_get_hi_4s, v4i16, v4i32>; + def _2s2d : NeonI_3VDN_addhn_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d", + opnode, NI_get_hi_2d, v2i32, v2i64>; + } +} + +defm ADDHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0100, "addhn", add, 1>; +defm SUBHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0110, "subhn", sub, 0>; + +// pattern for operation with 2 operands +class NeonI_3VD_2Op<bit q, bit u, bits<2> size, bits<4> opcode, + string asmop, string ResS, string OpS, + SDPatternOperator opnode, + RegisterClass ResVPR, RegisterClass OpVPR, + ValueType ResTy, ValueType OpTy> + : NeonI_3VDiff<q, u, size, opcode, + (outs ResVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm), + asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, + [(set (ResTy ResVPR:$Rd), + (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))], + NoItinerary>; + +// normal narrow pattern +multiclass NeonI_3VDN_2Op<bit u, bits<4> opcode, + string asmop, SDPatternOperator opnode, + bit Commutable = 0> +{ + let isCommutable = Commutable in { + def _8b8h : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h", + opnode, VPR64, VPR128, v8i8, v8i16>; + def _4h4s : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s", + opnode, VPR64, VPR128, v4i16, v4i32>; + def _2s2d : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d", + opnode, VPR64, VPR128, v2i32, v2i64>; + } +} + +defm RADDHNvvv : NeonI_3VDN_2Op<0b1, 0b0100, "raddhn", int_arm_neon_vraddhn, 1>; +defm RSUBHNvvv : NeonI_3VDN_2Op<0b1, 0b0110, "rsubhn", int_arm_neon_vrsubhn, 0>; + +// pattern for acle intrinsic with 3 operands +class NeonI_3VDN_addhn2_3Op<bit q, bit u, bits<2> size, bits<4> opcode, + string asmop, string ResS, string OpS, + SDPatternOperator opnode, SDPatternOperator get_hi, + ValueType OpTy, ValueType OpSTy> + : NeonI_3VDiff<q, u, size, opcode, + (outs VPR128:$Rd), (ins VPR64:$src, VPR128:$Rn, VPR128:$Rm), + asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, + [(set (v2i64 VPR128:$Rd), + (Neon_combine + (v1i64 VPR64:$src), + (v1i64 (bitconvert + (OpSTy (get_hi + (OpTy (opnode (OpTy VPR128:$Rn), + (OpTy VPR128:$Rm)))))))))], + NoItinerary> { + let Constraints = "$src = $Rd"; +} + +multiclass NeonI_3VDN_addhn2_3Op_v1<bit u, bits<4> opcode, + string asmop, + SDPatternOperator opnode> +{ + def _16b8h : NeonI_3VDN_addhn2_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h", + opnode, NI_get_hi_8h, v8i16, v8i8>; + def _8h4s : NeonI_3VDN_addhn2_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s", + opnode, NI_get_hi_4s, v4i32, v4i16>; + def _4s2d : NeonI_3VDN_addhn2_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d", + opnode, NI_get_hi_2d, v2i64, v2i32>; +} + +defm ADDHN2vvv : NeonI_3VDN_addhn2_3Op_v1<0b0, 0b0100, "addhn2", add>; +defm SUBHN2vvv : NeonI_3VDN_addhn2_3Op_v1<0b0, 0b0110, "subhn2", sub>; + +// pattern for acle intrinsic with 3 operands +class NeonI_3VDN_3Op<bit q, bit u, bits<2> size, bits<4> opcode, + string asmop, string ResS, string OpS, + SDPatternOperator opnode, + ValueType OpTy, ValueType OpSTy> + : NeonI_3VDiff<q, u, size, opcode, + (outs VPR128:$Rd), (ins VPR64:$src, VPR128:$Rn, VPR128:$Rm), + asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, + [(set (v2i64 VPR128:$Rd), + (Neon_combine (v1i64 VPR64:$src), + (v1i64 (bitconvert + (OpSTy (opnode (OpTy VPR128:$Rn), + (OpTy VPR128:$Rm)))))))], + NoItinerary> { + let Constraints = "$src = $Rd"; +} + +multiclass NeonI_3VDN_3Op_v1<bit u, bits<4> opcode, + string asmop, + SDPatternOperator opnode> +{ + def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h", + opnode, v8i16, v8i8>; + def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s", + opnode, v4i32, v4i16>; + def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d", + opnode, v2i64, v2i32>; +} + +defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2", + int_arm_neon_vraddhn>; +defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2", + int_arm_neon_vrsubhn>; + +// pattern that need to extend result +class NeonI_3VDL_Ext<bit q, bit u, bits<2> size, bits<4> opcode, + string asmop, string ResS, string OpS, + SDPatternOperator opnode, + RegisterClass OpVPR, + ValueType ResTy, ValueType OpTy, ValueType OpSTy> + : NeonI_3VDiff<q, u, size, opcode, + (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm), + asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, + [(set (ResTy VPR128:$Rd), + (ResTy (zext (OpSTy (opnode (OpTy OpVPR:$Rn), + (OpTy OpVPR:$Rm))))))], + NoItinerary>; + +multiclass NeonI_3VDL_zext<bit u, bits<4> opcode, + string asmop, SDPatternOperator opnode, + bit Commutable = 0> +{ + let isCommutable = Commutable in { + def _8h8b : NeonI_3VDL_Ext<0b0, u, 0b00, opcode, asmop, "8h", "8b", + opnode, VPR64, v8i16, v8i8, v8i8>; + def _4s4h : NeonI_3VDL_Ext<0b0, u, 0b01, opcode, asmop, "4s", "4h", + opnode, VPR64, v4i32, v4i16, v4i16>; + def _2d2s : NeonI_3VDL_Ext<0b0, u, 0b10, opcode, asmop, "2d", "2s", + opnode, VPR64, v2i64, v2i32, v2i32>; + } +} + +defm SABDLvvv : NeonI_3VDL_zext<0b0, 0b0111, "sabdl", int_arm_neon_vabds, 1>; +defm UABDLvvv : NeonI_3VDL_zext<0b1, 0b0111, "uabdl", int_arm_neon_vabdu, 1>; + +multiclass NeonI_Op_High<SDPatternOperator op> +{ + def _16B : PatFrag<(ops node:$Rn, node:$Rm), + (op (Neon_top16B node:$Rn), (Neon_top16B node:$Rm))>; + def _8H : PatFrag<(ops node:$Rn, node:$Rm), + (op (Neon_top8H node:$Rn), (Neon_top8H node:$Rm))>; + def _4S : PatFrag<(ops node:$Rn, node:$Rm), + (op (Neon_top4S node:$Rn), (Neon_top4S node:$Rm))>; +} + +defm NI_sabdl_hi : NeonI_Op_High<int_arm_neon_vabds>; +defm NI_uabdl_hi : NeonI_Op_High<int_arm_neon_vabdu>; +defm NI_smull_hi : NeonI_Op_High<int_arm_neon_vmulls>; +defm NI_umull_hi : NeonI_Op_High<int_arm_neon_vmullu>; +defm NI_qdmull_hi : NeonI_Op_High<int_arm_neon_vqdmull>; +defm NI_pmull_hi : NeonI_Op_High<int_arm_neon_vmullp>; + +multiclass NeonI_3VDL_Abd_u<bit u, bits<4> opcode, + string asmop, string opnode, + bit Commutable = 0> +{ + let isCommutable = Commutable in { + def _8h8b : NeonI_3VDL_Ext<0b1, u, 0b00, opcode, asmop, "8h", "16b", + !cast<PatFrag>(opnode # "_16B"), + VPR128, v8i16, v16i8, v8i8>; + def _4s4h : NeonI_3VDL_Ext<0b1, u, 0b01, opcode, asmop, "4s", "8h", + !cast<PatFrag>(opnode # "_8H"), + VPR128, v4i32, v8i16, v4i16>; + def _2d2s : NeonI_3VDL_Ext<0b1, u, 0b10, opcode, asmop, "2d", "4s", + !cast<PatFrag>(opnode # "_4S"), + VPR128, v2i64, v4i32, v2i32>; + } +} + +defm SABDL2vvv : NeonI_3VDL_Abd_u<0b0, 0b0111, "sabdl2", "NI_sabdl_hi", 1>; +defm UABDL2vvv : NeonI_3VDL_Abd_u<0b1, 0b0111, "uabdl2", "NI_uabdl_hi", 1>; + +// For pattern that need two operators being chained. +class NeonI_3VDL_Aba<bit q, bit u, bits<2> size, bits<4> opcode, + string asmop, string ResS, string OpS, + SDPatternOperator opnode, SDPatternOperator subop, + RegisterClass OpVPR, + ValueType ResTy, ValueType OpTy, ValueType OpSTy> + : NeonI_3VDiff<q, u, size, opcode, + (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm), + asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, + [(set (ResTy VPR128:$Rd), + (ResTy (opnode + (ResTy VPR128:$src), + (ResTy (zext (OpSTy (subop (OpTy OpVPR:$Rn), + (OpTy OpVPR:$Rm))))))))], + NoItinerary> { + let Constraints = "$src = $Rd"; +} + +multiclass NeonI_3VDL_Aba_v1<bit u, bits<4> opcode, + string asmop, SDPatternOperator opnode, + SDPatternOperator subop> +{ + def _8h8b : NeonI_3VDL_Aba<0b0, u, 0b00, opcode, asmop, "8h", "8b", + opnode, subop, VPR64, v8i16, v8i8, v8i8>; + def _4s4h : NeonI_3VDL_Aba<0b0, u, 0b01, opcode, asmop, "4s", "4h", + opnode, subop, VPR64, v4i32, v4i16, v4i16>; + def _2d2s : NeonI_3VDL_Aba<0b0, u, 0b10, opcode, asmop, "2d", "2s", + opnode, subop, VPR64, v2i64, v2i32, v2i32>; +} + +defm SABALvvv : NeonI_3VDL_Aba_v1<0b0, 0b0101, "sabal", + add, int_arm_neon_vabds>; +defm UABALvvv : NeonI_3VDL_Aba_v1<0b1, 0b0101, "uabal", + add, int_arm_neon_vabdu>; + +multiclass NeonI_3VDL2_Aba_v1<bit u, bits<4> opcode, + string asmop, SDPatternOperator opnode, + string subop> +{ + def _8h8b : NeonI_3VDL_Aba<0b1, u, 0b00, opcode, asmop, "8h", "16b", + opnode, !cast<PatFrag>(subop # "_16B"), + VPR128, v8i16, v16i8, v8i8>; + def _4s4h : NeonI_3VDL_Aba<0b1, u, 0b01, opcode, asmop, "4s", "8h", + opnode, !cast<PatFrag>(subop # "_8H"), + VPR128, v4i32, v8i16, v4i16>; + def _2d2s : NeonI_3VDL_Aba<0b1, u, 0b10, opcode, asmop, "2d", "4s", + opnode, !cast<PatFrag>(subop # "_4S"), + VPR128, v2i64, v4i32, v2i32>; +} + +defm SABAL2vvv : NeonI_3VDL2_Aba_v1<0b0, 0b0101, "sabal2", add, + "NI_sabdl_hi">; +defm UABAL2vvv : NeonI_3VDL2_Aba_v1<0b1, 0b0101, "uabal2", add, + "NI_uabdl_hi">; + +// Long pattern with 2 operands +multiclass NeonI_3VDL_2Op<bit u, bits<4> opcode, + string asmop, SDPatternOperator opnode, + bit Commutable = 0> +{ + let isCommutable = Commutable in { + def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b", + opnode, VPR128, VPR64, v8i16, v8i8>; + def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h", + opnode, VPR128, VPR64, v4i32, v4i16>; + def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s", + opnode, VPR128, VPR64, v2i64, v2i32>; + } +} + +defm SMULLvvv : NeonI_3VDL_2Op<0b0, 0b1100, "smull", int_arm_neon_vmulls, 1>; +defm UMULLvvv : NeonI_3VDL_2Op<0b1, 0b1100, "umull", int_arm_neon_vmullu, 1>; + +class NeonI_3VDL2_2Op_mull<bit q, bit u, bits<2> size, bits<4> opcode, + string asmop, string ResS, string OpS, + SDPatternOperator opnode, + ValueType ResTy, ValueType OpTy> + : NeonI_3VDiff<q, u, size, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), + asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, + [(set (ResTy VPR128:$Rd), + (ResTy (opnode (OpTy VPR128:$Rn), (OpTy VPR128:$Rm))))], + NoItinerary>; + + +multiclass NeonI_3VDL2_2Op_mull_v1<bit u, bits<4> opcode, + string asmop, + string opnode, + bit Commutable = 0> +{ + let isCommutable = Commutable in { + def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b", + !cast<PatFrag>(opnode # "_16B"), + v8i16, v16i8>; + def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h", + !cast<PatFrag>(opnode # "_8H"), + v4i32, v8i16>; + def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s", + !cast<PatFrag>(opnode # "_4S"), + v2i64, v4i32>; + } +} + +defm SMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b0, 0b1100, "smull2", + "NI_smull_hi", 1>; +defm UMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b1, 0b1100, "umull2", + "NI_umull_hi", 1>; + +// Long pattern with 3 operands +class NeonI_3VDL_3Op<bit q, bit u, bits<2> size, bits<4> opcode, + string asmop, string ResS, string OpS, + SDPatternOperator opnode, + ValueType ResTy, ValueType OpTy> + : NeonI_3VDiff<q, u, size, opcode, + (outs VPR128:$Rd), (ins VPR128:$src, VPR64:$Rn, VPR64:$Rm), + asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, + [(set (ResTy VPR128:$Rd), + (ResTy (opnode + (ResTy VPR128:$src), + (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))))], + NoItinerary> { + let Constraints = "$src = $Rd"; +} + +multiclass NeonI_3VDL_3Op_v1<bit u, bits<4> opcode, + string asmop, SDPatternOperator opnode> +{ + def _8h8b : NeonI_3VDL_3Op<0b0, u, 0b00, opcode, asmop, "8h", "8b", + opnode, v8i16, v8i8>; + def _4s4h : NeonI_3VDL_3Op<0b0, u, 0b01, opcode, asmop, "4s", "4h", + opnode, v4i32, v4i16>; + def _2d2s : NeonI_3VDL_3Op<0b0, u, 0b10, opcode, asmop, "2d", "2s", + opnode, v2i64, v2i32>; +} + +def Neon_smlal : PatFrag<(ops node:$Rd, node:$Rm, node:$Rn), + (add node:$Rd, + (int_arm_neon_vmulls node:$Rn, node:$Rm))>; + +def Neon_umlal : PatFrag<(ops node:$Rd, node:$Rm, node:$Rn), + (add node:$Rd, + (int_arm_neon_vmullu node:$Rn, node:$Rm))>; + +def Neon_smlsl : PatFrag<(ops node:$Rd, node:$Rm, node:$Rn), + (sub node:$Rd, + (int_arm_neon_vmulls node:$Rn, node:$Rm))>; + +def Neon_umlsl : PatFrag<(ops node:$Rd, node:$Rm, node:$Rn), + (sub node:$Rd, + (int_arm_neon_vmullu node:$Rn, node:$Rm))>; + +defm SMLALvvv : NeonI_3VDL_3Op_v1<0b0, 0b1000, "smlal", Neon_smlal>; +defm UMLALvvv : NeonI_3VDL_3Op_v1<0b1, 0b1000, "umlal", Neon_umlal>; + +defm SMLSLvvv : NeonI_3VDL_3Op_v1<0b0, 0b1010, "smlsl", Neon_smlsl>; +defm UMLSLvvv : NeonI_3VDL_3Op_v1<0b1, 0b1010, "umlsl", Neon_umlsl>; + +class NeonI_3VDL2_3Op_mlas<bit q, bit u, bits<2> size, bits<4> opcode, + string asmop, string ResS, string OpS, + SDPatternOperator subop, SDPatternOperator opnode, + RegisterClass OpVPR, + ValueType ResTy, ValueType OpTy> + : NeonI_3VDiff<q, u, size, opcode, + (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm), + asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, + [(set (ResTy VPR128:$Rd), + (ResTy (subop + (ResTy VPR128:$src), + (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))))], + NoItinerary> { + let Constraints = "$src = $Rd"; +} + +multiclass NeonI_3VDL2_3Op_mlas_v1<bit u, bits<4> opcode, + string asmop, + SDPatternOperator subop, + string opnode> +{ + def _8h16b : NeonI_3VDL2_3Op_mlas<0b1, u, 0b00, opcode, asmop, "8h", "16b", + subop, !cast<PatFrag>(opnode # "_16B"), + VPR128, v8i16, v16i8>; + def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h", + subop, !cast<PatFrag>(opnode # "_8H"), + VPR128, v4i32, v8i16>; + def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s", + subop, !cast<PatFrag>(opnode # "_4S"), + VPR128, v2i64, v4i32>; +} + +defm SMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1000, "smlal2", + add, "NI_smull_hi">; +defm UMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1000, "umlal2", + add, "NI_umull_hi">; + +defm SMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1010, "smlsl2", + sub, "NI_smull_hi">; +defm UMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1010, "umlsl2", + sub, "NI_umull_hi">; + +multiclass NeonI_3VDL_qdmlal_3Op_v2<bit u, bits<4> opcode, + string asmop, SDPatternOperator opnode> +{ + def _4s4h : NeonI_3VDL2_3Op_mlas<0b0, u, 0b01, opcode, asmop, "4s", "4h", + opnode, int_arm_neon_vqdmull, + VPR64, v4i32, v4i16>; + def _2d2s : NeonI_3VDL2_3Op_mlas<0b0, u, 0b10, opcode, asmop, "2d", "2s", + opnode, int_arm_neon_vqdmull, + VPR64, v2i64, v2i32>; +} + +defm SQDMLALvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1001, "sqdmlal", + int_arm_neon_vqadds>; +defm SQDMLSLvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1011, "sqdmlsl", + int_arm_neon_vqsubs>; + +multiclass NeonI_3VDL_v2<bit u, bits<4> opcode, + string asmop, SDPatternOperator opnode, + bit Commutable = 0> +{ + let isCommutable = Commutable in { + def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h", + opnode, VPR128, VPR64, v4i32, v4i16>; + def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s", + opnode, VPR128, VPR64, v2i64, v2i32>; + } +} + +defm SQDMULLvvv : NeonI_3VDL_v2<0b0, 0b1101, "sqdmull", + int_arm_neon_vqdmull, 1>; + +multiclass NeonI_3VDL2_2Op_mull_v2<bit u, bits<4> opcode, + string asmop, + string opnode, + bit Commutable = 0> +{ + let isCommutable = Commutable in { + def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h", + !cast<PatFrag>(opnode # "_8H"), + v4i32, v8i16>; + def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s", + !cast<PatFrag>(opnode # "_4S"), + v2i64, v4i32>; + } +} + +defm SQDMULL2vvv : NeonI_3VDL2_2Op_mull_v2<0b0, 0b1101, "sqdmull2", + "NI_qdmull_hi", 1>; + +multiclass NeonI_3VDL2_3Op_qdmlal_v2<bit u, bits<4> opcode, + string asmop, + SDPatternOperator opnode> +{ + def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h", + opnode, NI_qdmull_hi_8H, + VPR128, v4i32, v8i16>; + def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s", + opnode, NI_qdmull_hi_4S, + VPR128, v2i64, v4i32>; +} + +defm SQDMLAL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1001, "sqdmlal2", + int_arm_neon_vqadds>; +defm SQDMLSL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1011, "sqdmlsl2", + int_arm_neon_vqsubs>; + +multiclass NeonI_3VDL_v3<bit u, bits<4> opcode, + string asmop, SDPatternOperator opnode, + bit Commutable = 0> +{ + let isCommutable = Commutable in { + def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b", + opnode, VPR128, VPR64, v8i16, v8i8>; + } +} + +defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp, 1>; + +multiclass NeonI_3VDL2_2Op_mull_v3<bit u, bits<4> opcode, + string asmop, + string opnode, + bit Commutable = 0> +{ + let isCommutable = Commutable in { + def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b", + !cast<PatFrag>(opnode # "_16B"), + v8i16, v16i8>; + } +} + +defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2", + "NI_pmull_hi", 1>; + +// End of implementation for instruction class (3V Diff) + // Scalar Arithmetic class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop> |