diff options
author | David Goodwin <david_goodwin@apple.com> | 2009-09-25 18:38:29 +0000 |
---|---|---|
committer | David Goodwin <david_goodwin@apple.com> | 2009-09-25 18:38:29 +0000 |
commit | 658ea6099724d0aaf5297a02b185f8351fcab389 (patch) | |
tree | f77275b125e4310420463e69803746a0ae317323 | |
parent | 1d73742ad9c20a9eb1f4538265477a15b094fee7 (diff) | |
download | external_llvm-658ea6099724d0aaf5297a02b185f8351fcab389.zip external_llvm-658ea6099724d0aaf5297a02b185f8351fcab389.tar.gz external_llvm-658ea6099724d0aaf5297a02b185f8351fcab389.tar.bz2 |
Finish scheduling itineraries for NEON.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@82788 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 747 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrVFP.td | 8 | ||||
-rw-r--r-- | lib/Target/ARM/ARMSchedule.td | 34 | ||||
-rw-r--r-- | lib/Target/ARM/ARMScheduleV7.td | 146 |
4 files changed, 553 insertions, 382 deletions
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 702ce9e..5e8ab9e 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -481,11 +481,10 @@ class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, let isCommutable = Commutable; } class N3VDSL<bits<2> op21_20, bits<4> op11_8, - string OpcodeStr, ValueType Ty, SDNode ShOp> + InstrItinClass itin, string OpcodeStr, ValueType Ty, SDNode ShOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - NoItinerary, - !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", [(set (Ty DPR:$dst), (Ty (ShOp (Ty DPR:$src1), (Ty (NEONvduplane (Ty DPR_VFP2:$src2), @@ -496,7 +495,7 @@ class N3VDSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, ValueType Ty, SDNode ShOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), - NoItinerary, + IIC_VMULi16D, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", [(set (Ty DPR:$dst), (Ty (ShOp (Ty DPR:$src1), @@ -515,11 +514,11 @@ class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, let isCommutable = Commutable; } class N3VQSL<bits<2> op21_20, bits<4> op11_8, - string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode ShOp> + InstrItinClass itin, string OpcodeStr, + ValueType ResTy, ValueType OpTy, SDNode ShOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - NoItinerary, - !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", [(set (ResTy QPR:$dst), (ResTy (ShOp (ResTy QPR:$src1), (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2), @@ -530,7 +529,7 @@ class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode ShOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane), - NoItinerary, + IIC_VMULi16Q, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", [(set (ResTy QPR:$dst), (ResTy (ShOp (ResTy QPR:$src1), @@ -544,7 +543,7 @@ class N3VDs<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), NoItinerary, + (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND, !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", []> { let isCommutable = Commutable; } @@ -557,32 +556,30 @@ class N3VDsPat<SDNode OpNode, NeonI Inst> // Basic 3-register intrinsics, both double- and quad-register. class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType ResTy, ValueType OpTy, + InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), NoItinerary, + (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), (OpTy DPR:$src2))))]> { let isCommutable = Commutable; } -class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, +class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, ValueType Ty, Intrinsic IntOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - NoItinerary, - !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", [(set (Ty DPR:$dst), (Ty (IntOp (Ty DPR:$src1), (Ty (NEONvduplane (Ty DPR_VFP2:$src2), imm:$lane)))))]> { let isCommutable = 0; } -class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, +class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, ValueType Ty, Intrinsic IntOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), - NoItinerary, - !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", [(set (Ty DPR:$dst), (Ty (IntOp (Ty DPR:$src1), (Ty (NEONvduplane (Ty DPR_8:$src2), @@ -591,32 +588,30 @@ class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, } class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType ResTy, ValueType OpTy, + InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 1, op4, - (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), NoItinerary, + (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), (OpTy QPR:$src2))))]> { let isCommutable = Commutable; } -class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, +class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - NoItinerary, - !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", [(set (ResTy QPR:$dst), (ResTy (IntOp (ResTy QPR:$src1), (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2), imm:$lane)))))]> { let isCommutable = 0; } -class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, +class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane), - NoItinerary, - !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", [(set (ResTy QPR:$dst), (ResTy (IntOp (ResTy QPR:$src1), (ResTy (NEONvduplane (OpTy DPR_8:$src2), @@ -626,30 +621,29 @@ class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, // Multiply-Add/Sub operations, both double- and quad-register. class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode OpNode> + InstrItinClass itin, string OpcodeStr, + ValueType Ty, SDNode MulOp, SDNode OpNode> : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), NoItinerary, + (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), itin, !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", [(set DPR:$dst, (Ty (OpNode DPR:$src1, (Ty (MulOp DPR:$src2, DPR:$src3)))))]>; -class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, +class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode ShOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), - (ins DPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), - NoItinerary, + (ins DPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin, !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", [(set (Ty DPR:$dst), (Ty (ShOp (Ty DPR:$src1), (Ty (MulOp DPR:$src2, (Ty (NEONvduplane (Ty DPR_VFP2:$src3), imm:$lane)))))))]>; -class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, +class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode ShOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), - (ins DPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), - NoItinerary, + (ins DPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin, !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", [(set (Ty DPR:$dst), (Ty (ShOp (Ty DPR:$src1), @@ -658,32 +652,31 @@ class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, imm:$lane)))))))]>; class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode OpNode> + InstrItinClass itin, string OpcodeStr, ValueType Ty, + SDNode MulOp, SDNode OpNode> : N3V<op24, op23, op21_20, op11_8, 1, op4, - (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), NoItinerary, + (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), itin, !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", [(set QPR:$dst, (Ty (OpNode QPR:$src1, (Ty (MulOp QPR:$src2, QPR:$src3)))))]>; -class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, +class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode MulOp, SDNode ShOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), - (ins QPR:$src1, QPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), - NoItinerary, + (ins QPR:$src1, QPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin, !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", [(set (ResTy QPR:$dst), (ResTy (ShOp (ResTy QPR:$src1), (ResTy (MulOp QPR:$src2, (ResTy (NEONvduplane (OpTy DPR_VFP2:$src3), imm:$lane)))))))]>; -class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, +class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode MulOp, SDNode ShOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), - (ins QPR:$src1, QPR:$src2, DPR_8:$src3, nohash_imm:$lane), - NoItinerary, + (ins QPR:$src1, QPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin, !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", [(set (ResTy QPR:$dst), (ResTy (ShOp (ResTy QPR:$src1), @@ -693,10 +686,11 @@ class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, // Multiply-Add/Sub operations, scalar single-precision class N3VDMulOps<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode OpNode> + InstrItinClass itin, string OpcodeStr, + ValueType Ty, SDNode MulOp, SDNode OpNode> : N3V<op24, op23, op21_20, op11_8, 0, op4, (outs DPR_VFP2:$dst), - (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), NoItinerary, + (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), itin, !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", []>; class N3VDMulOpsPat<SDNode MulNode, SDNode OpNode, NeonI Inst> @@ -710,18 +704,18 @@ class N3VDMulOpsPat<SDNode MulNode, SDNode OpNode, NeonI Inst> // Neon 3-argument intrinsics, both double- and quad-register. // The destination register is also used as the first source operand register. class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType ResTy, ValueType OpTy, - Intrinsic IntOp> + InstrItinClass itin, string OpcodeStr, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), NoItinerary, + (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), itin, !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), (OpTy DPR:$src2), (OpTy DPR:$src3))))]>; class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType ResTy, ValueType OpTy, - Intrinsic IntOp> + InstrItinClass itin, string OpcodeStr, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<op24, op23, op21_20, op11_8, 1, op4, - (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), NoItinerary, + (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), itin, !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), (OpTy QPR:$src2), (OpTy QPR:$src3))))]>; @@ -729,31 +723,30 @@ class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, // Neon Long 3-argument intrinsic. The destination register is // a quad-register and is also used as the first source operand register. class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType TyQ, ValueType TyD, Intrinsic IntOp> + InstrItinClass itin, string OpcodeStr, + ValueType TyQ, ValueType TyD, Intrinsic IntOp> : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), NoItinerary, + (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), itin, !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst", [(set QPR:$dst, (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2), (TyD DPR:$src3))))]>; -class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, +class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), - (ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), - NoItinerary, + (ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin, !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", [(set (ResTy QPR:$dst), (ResTy (IntOp (ResTy QPR:$src1), (OpTy DPR:$src2), (OpTy (NEONvduplane (OpTy DPR_VFP2:$src3), imm:$lane)))))]>; -class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, +class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), - (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), - NoItinerary, + (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin, !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", [(set (ResTy QPR:$dst), (ResTy (IntOp (ResTy QPR:$src1), @@ -767,7 +760,7 @@ class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, string OpcodeStr, ValueType TyD, ValueType TyQ, Intrinsic IntOp, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs DPR:$dst), (ins QPR:$src1, QPR:$src2), NoItinerary, + (outs DPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VBINi4D, !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src1), (TyQ QPR:$src2))))]> { let isCommutable = Commutable; @@ -775,31 +768,29 @@ class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, // Long 3-register intrinsics. class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType TyQ, ValueType TyD, + InstrItinClass itin, string OpcodeStr, ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), NoItinerary, + (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src1), (TyD DPR:$src2))))]> { let isCommutable = Commutable; } -class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, +class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - NoItinerary, - !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", [(set (ResTy QPR:$dst), (ResTy (IntOp (OpTy DPR:$src1), (OpTy (NEONvduplane (OpTy DPR_VFP2:$src2), imm:$lane)))))]>; -class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, +class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), - NoItinerary, - !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", [(set (ResTy QPR:$dst), (ResTy (IntOp (OpTy DPR:$src1), (OpTy (NEONvduplane (OpTy DPR_8:$src2), @@ -810,7 +801,7 @@ class N3VWInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, string OpcodeStr, ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 0, op4, - (outs QPR:$dst), (ins QPR:$src1, DPR:$src2), NoItinerary, + (outs QPR:$dst), (ins QPR:$src1, DPR:$src2), IIC_VSUBiD, !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "", [(set QPR:$dst, (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2))))]> { let isCommutable = Commutable; @@ -821,13 +812,13 @@ class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst), - (ins DPR:$src), NoItinerary, !strconcat(OpcodeStr, "\t$dst, $src"), "", + (ins DPR:$src), IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "", [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>; class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst), - (ins QPR:$src), NoItinerary, !strconcat(OpcodeStr, "\t$dst, $src"), "", + (ins QPR:$src), IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "", [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>; // Pairwise long 2-register accumulate intrinsics, @@ -837,29 +828,31 @@ class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, - (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), NoItinerary, + (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), IIC_VPALiD, !strconcat(OpcodeStr, "\t$dst, $src2"), "$src1 = $dst", [(set DPR:$dst, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$src2))))]>; class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, - (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), NoItinerary, + (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VPALiQ, !strconcat(OpcodeStr, "\t$dst, $src2"), "$src1 = $dst", [(set QPR:$dst, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$src2))))]>; // Shift by immediate, // both double- and quad-register. class N2VDSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, - bit op4, string OpcodeStr, ValueType Ty, SDNode OpNode> + bit op4, InstrItinClass itin, string OpcodeStr, + ValueType Ty, SDNode OpNode> : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, - (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), NoItinerary, + (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), itin, !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", [(set DPR:$dst, (Ty (OpNode (Ty DPR:$src), (i32 imm:$SIMM))))]>; class N2VQSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, - bit op4, string OpcodeStr, ValueType Ty, SDNode OpNode> + bit op4, InstrItinClass itin, string OpcodeStr, + ValueType Ty, SDNode OpNode> : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, - (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), NoItinerary, + (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), itin, !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", [(set QPR:$dst, (Ty (OpNode (Ty QPR:$src), (i32 imm:$SIMM))))]>; @@ -868,17 +861,17 @@ class N2VLSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, bit op6, bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2VImm<op24, op23, op21_16, op11_8, op7, op6, op4, - (outs QPR:$dst), (ins DPR:$src, i32imm:$SIMM), NoItinerary, + (outs QPR:$dst), (ins DPR:$src, i32imm:$SIMM), IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", [(set QPR:$dst, (ResTy (OpNode (OpTy DPR:$src), (i32 imm:$SIMM))))]>; // Narrow shift by immediate. class N2VNSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, - bit op6, bit op4, string OpcodeStr, ValueType ResTy, - ValueType OpTy, SDNode OpNode> + bit op6, bit op4, InstrItinClass itin, string OpcodeStr, + ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2VImm<op24, op23, op21_16, op11_8, op7, op6, op4, - (outs DPR:$dst), (ins QPR:$src, i32imm:$SIMM), NoItinerary, + (outs DPR:$dst), (ins QPR:$src, i32imm:$SIMM), itin, !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", [(set DPR:$dst, (ResTy (OpNode (OpTy QPR:$src), (i32 imm:$SIMM))))]>; @@ -889,7 +882,7 @@ class N2VDShAdd<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), - NoItinerary, + IIC_VPALiD, !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", [(set DPR:$dst, (Ty (add DPR:$src1, (Ty (ShOp DPR:$src2, (i32 imm:$SIMM))))))]>; @@ -897,7 +890,7 @@ class N2VQShAdd<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), - NoItinerary, + IIC_VPALiD, !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", [(set QPR:$dst, (Ty (add QPR:$src1, (Ty (ShOp QPR:$src2, (i32 imm:$SIMM))))))]>; @@ -908,14 +901,14 @@ class N2VDShIns<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), - NoItinerary, + IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", [(set DPR:$dst, (Ty (ShOp DPR:$src1, DPR:$src2, (i32 imm:$SIMM))))]>; class N2VQShIns<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), - NoItinerary, + IIC_VSHLiQ, !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", [(set QPR:$dst, (Ty (ShOp QPR:$src1, QPR:$src2, (i32 imm:$SIMM))))]>; @@ -925,14 +918,14 @@ class N2VCvtD<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, - (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), NoItinerary, + (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), IIC_VUNAD, !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src), (i32 imm:$SIMM))))]>; class N2VCvtQ<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, - (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), NoItinerary, + (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), IIC_VUNAQ, !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src), (i32 imm:$SIMM))))]>; @@ -966,9 +959,9 @@ multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> { def v4i16 : N3VDSL16<0b01, op11_8, !strconcat(OpcodeStr, "16"), v4i16, ShOp>; - def v2i32 : N3VDSL<0b10, op11_8, !strconcat(OpcodeStr, "32"), v2i32, ShOp>; + def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, !strconcat(OpcodeStr, "32"), v2i32, ShOp>; def v8i16 : N3VQSL16<0b01, op11_8, !strconcat(OpcodeStr, "16"), v8i16, v4i16, ShOp>; - def v4i32 : N3VQSL<0b10, op11_8, !strconcat(OpcodeStr, "32"), v4i32, v2i32, ShOp>; + def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, !strconcat(OpcodeStr, "32"), v4i32, v2i32, ShOp>; } // ....then also with element size 64 bits: @@ -1016,45 +1009,56 @@ multiclass N2VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, // First with only element sizes of 16 and 32 bits: multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { // 64-bit vector types. - def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"16"), + def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, itinD16, !strconcat(OpcodeStr,"16"), v4i16, v4i16, IntOp, Commutable>; - def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"32"), + def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, itinD32, !strconcat(OpcodeStr,"32"), v2i32, v2i32, IntOp, Commutable>; // 128-bit vector types. - def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"16"), + def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, itinQ16, !strconcat(OpcodeStr,"16"), v8i16, v8i16, IntOp, Commutable>; - def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"32"), + def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, itinQ32, !strconcat(OpcodeStr,"32"), v4i32, v4i32, IntOp, Commutable>; } -multiclass N3VIntSL_HS<bits<4> op11_8, string OpcodeStr, Intrinsic IntOp> { - def v4i16 : N3VDIntSL16<0b01, op11_8, !strconcat(OpcodeStr, "16"), v4i16, IntOp>; - def v2i32 : N3VDIntSL<0b10, op11_8, !strconcat(OpcodeStr, "32"), v2i32, IntOp>; - def v8i16 : N3VQIntSL16<0b01, op11_8, !strconcat(OpcodeStr, "16"), v8i16, v4i16, IntOp>; - def v4i32 : N3VQIntSL<0b10, op11_8, !strconcat(OpcodeStr, "32"), v4i32, v2i32, IntOp>; +multiclass N3VIntSL_HS<bits<4> op11_8, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, + string OpcodeStr, Intrinsic IntOp> { + def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, !strconcat(OpcodeStr, "16"), v4i16, IntOp>; + def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, !strconcat(OpcodeStr, "32"), v2i32, IntOp>; + def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, !strconcat(OpcodeStr, "16"), v8i16, v4i16, IntOp>; + def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, !strconcat(OpcodeStr, "32"), v4i32, v2i32, IntOp>; } // ....then also with element size of 8 bits: multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> - : N3VInt_HS<op24, op23, op11_8, op4, OpcodeStr, IntOp, Commutable> { - def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"), - v8i8, v8i8, IntOp, Commutable>; - def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"), - v16i8, v16i8, IntOp, Commutable>; + : N3VInt_HS<op24, op23, op11_8, op4, itinD16, itinD32, itinQ16, itinQ32, + OpcodeStr, IntOp, Commutable> { + def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, itinD16, + !strconcat(OpcodeStr, "8"), v8i8, v8i8, IntOp, Commutable>; + def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, itinQ16, + !strconcat(OpcodeStr, "8"), v16i8, v16i8, IntOp, Commutable>; } // ....then also with element size of 64 bits: multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> - : N3VInt_QHS<op24, op23, op11_8, op4, OpcodeStr, IntOp, Commutable> { - def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, !strconcat(OpcodeStr,"64"), - v1i64, v1i64, IntOp, Commutable>; - def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, !strconcat(OpcodeStr,"64"), - v2i64, v2i64, IntOp, Commutable>; + : N3VInt_QHS<op24, op23, op11_8, op4, itinD16, itinD32, itinQ16, itinQ32, + OpcodeStr, IntOp, Commutable> { + def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, itinD32, + !strconcat(OpcodeStr,"64"), v1i64, v1i64, IntOp, Commutable>; + def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, itinQ32, + !strconcat(OpcodeStr,"64"), v2i64, v2i64, IntOp, Commutable>; } @@ -1075,27 +1079,29 @@ multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, // First with only element sizes of 16 and 32 bits: multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, - string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { - def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"16"), - v4i32, v4i16, IntOp, Commutable>; - def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"32"), - v2i64, v2i32, IntOp, Commutable>; + InstrItinClass itin, string OpcodeStr, + Intrinsic IntOp, bit Commutable = 0> { + def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin, + !strconcat(OpcodeStr,"16"), v4i32, v4i16, IntOp, Commutable>; + def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin, + !strconcat(OpcodeStr,"32"), v2i64, v2i32, IntOp, Commutable>; } multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8, - string OpcodeStr, Intrinsic IntOp> { - def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, + InstrItinClass itin, string OpcodeStr, Intrinsic IntOp> { + def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin, !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>; - def v2i32 : N3VLIntSL<op24, 0b10, op11_8, + def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin, !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>; } // ....then also with element size of 8 bits: multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, - string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> - : N3VLInt_HS<op24, op23, op11_8, op4, OpcodeStr, IntOp, Commutable> { - def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"), - v8i16, v8i8, IntOp, Commutable>; + InstrItinClass itin, string OpcodeStr, + Intrinsic IntOp, bit Commutable = 0> + : N3VLInt_HS<op24, op23, op11_8, op4, itin, OpcodeStr, IntOp, Commutable> { + def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin, + !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp, Commutable>; } @@ -1115,32 +1121,37 @@ multiclass N3VWInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, // Neon Multiply-Op vector operations, // element sizes of 8, 16 and 32 bits: multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, SDNode OpNode> { // 64-bit vector types. - def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, + def v8i8 : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16, !strconcat(OpcodeStr, "8"), v8i8, mul, OpNode>; - def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, + def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16, !strconcat(OpcodeStr, "16"), v4i16, mul, OpNode>; - def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, + def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32, !strconcat(OpcodeStr, "32"), v2i32, mul, OpNode>; // 128-bit vector types. - def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, + def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16, !strconcat(OpcodeStr, "8"), v16i8, mul, OpNode>; - def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, + def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16, !strconcat(OpcodeStr, "16"), v8i16, mul, OpNode>; - def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, + def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32, !strconcat(OpcodeStr, "32"), v4i32, mul, OpNode>; } -multiclass N3VMulOpSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> { - def v4i16 : N3VDMulOpSL16<0b01, op11_8, +multiclass N3VMulOpSL_HS<bits<4> op11_8, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, + string OpcodeStr, SDNode ShOp> { + def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, !strconcat(OpcodeStr, "16"), v4i16, mul, ShOp>; - def v2i32 : N3VDMulOpSL<0b10, op11_8, + def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, !strconcat(OpcodeStr, "32"), v2i32, mul, ShOp>; - def v8i16 : N3VQMulOpSL16<0b01, op11_8, + def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, !strconcat(OpcodeStr, "16"), v8i16, v4i16, mul, ShOp>; - def v4i32 : N3VQMulOpSL<0b10, op11_8, + def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, !strconcat(OpcodeStr, "32"), v4i32, v2i32, mul, ShOp>; } @@ -1149,19 +1160,19 @@ multiclass N3VMulOpSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> { multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, string OpcodeStr, Intrinsic IntOp> { // 64-bit vector types. - def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, + def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16D, !strconcat(OpcodeStr, "8"), v8i8, v8i8, IntOp>; - def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, + def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D, !strconcat(OpcodeStr, "16"), v4i16, v4i16, IntOp>; - def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, + def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32D, !strconcat(OpcodeStr, "32"), v2i32, v2i32, IntOp>; // 128-bit vector types. - def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, + def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16Q, !strconcat(OpcodeStr, "8"), v16i8, v16i8, IntOp>; - def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, + def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16Q, !strconcat(OpcodeStr, "16"), v8i16, v8i16, IntOp>; - def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, + def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32Q, !strconcat(OpcodeStr, "32"), v4i32, v4i32, IntOp>; } @@ -1171,17 +1182,17 @@ multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, // First with only element sizes of 16 and 32 bits: multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, string OpcodeStr, Intrinsic IntOp> { - def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, + def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D, !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>; - def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, + def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi16D, !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>; } multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, string OpcodeStr, Intrinsic IntOp> { - def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, + def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D, !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>; - def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, + def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D, !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>; } @@ -1189,7 +1200,7 @@ multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, string OpcodeStr, Intrinsic IntOp> : N3VLInt3_HS<op24, op23, op11_8, op4, OpcodeStr, IntOp> { - def v8i16 : N3VLInt3<op24, op23, 0b01, op11_8, op4, + def v8i16 : N3VLInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D, !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp>; } @@ -1267,25 +1278,25 @@ multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, // Neon 2-register vector shift by immediate, // element sizes of 8, 16, 32 and 64 bits: multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, - string OpcodeStr, SDNode OpNode> { + InstrItinClass itin, string OpcodeStr, SDNode OpNode> { // 64-bit vector types. - def v8i8 : N2VDSh<op24, op23, 0b001000, op11_8, 0, op4, + def v8i8 : N2VDSh<op24, op23, 0b001000, op11_8, 0, op4, itin, !strconcat(OpcodeStr, "8"), v8i8, OpNode>; - def v4i16 : N2VDSh<op24, op23, 0b010000, op11_8, 0, op4, + def v4i16 : N2VDSh<op24, op23, 0b010000, op11_8, 0, op4, itin, !strconcat(OpcodeStr, "16"), v4i16, OpNode>; - def v2i32 : N2VDSh<op24, op23, 0b100000, op11_8, 0, op4, + def v2i32 : N2VDSh<op24, op23, 0b100000, op11_8, 0, op4, itin, !strconcat(OpcodeStr, "32"), v2i32, OpNode>; - def v1i64 : N2VDSh<op24, op23, 0b000000, op11_8, 1, op4, + def v1i64 : N2VDSh<op24, op23, 0b000000, op11_8, 1, op4, itin, !strconcat(OpcodeStr, "64"), v1i64, OpNode>; // 128-bit vector types. - def v16i8 : N2VQSh<op24, op23, 0b001000, op11_8, 0, op4, + def v16i8 : N2VQSh<op24, op23, 0b001000, op11_8, 0, op4, itin, !strconcat(OpcodeStr, "8"), v16i8, OpNode>; - def v8i16 : N2VQSh<op24, op23, 0b010000, op11_8, 0, op4, + def v8i16 : N2VQSh<op24, op23, 0b010000, op11_8, 0, op4, itin, !strconcat(OpcodeStr, "16"), v8i16, OpNode>; - def v4i32 : N2VQSh<op24, op23, 0b100000, op11_8, 0, op4, + def v4i32 : N2VQSh<op24, op23, 0b100000, op11_8, 0, op4, itin, !strconcat(OpcodeStr, "32"), v4i32, OpNode>; - def v2i64 : N2VQSh<op24, op23, 0b000000, op11_8, 1, op4, + def v2i64 : N2VQSh<op24, op23, 0b000000, op11_8, 1, op4, itin, !strconcat(OpcodeStr, "64"), v2i64, OpNode>; } @@ -1352,20 +1363,26 @@ defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd.i", add, def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd.f32", v2f32, v2f32, fadd, 1>; def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd.f32", v4f32, v4f32, fadd, 1>; // VADDL : Vector Add Long (Q = D + D) -defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, "vaddl.s", int_arm_neon_vaddls, 1>; -defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, "vaddl.u", int_arm_neon_vaddlu, 1>; +defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, "vaddl.s", int_arm_neon_vaddls, 1>; +defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, "vaddl.u", int_arm_neon_vaddlu, 1>; // VADDW : Vector Add Wide (Q = Q + D) defm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw.s", int_arm_neon_vaddws, 0>; defm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw.u", int_arm_neon_vaddwu, 0>; // VHADD : Vector Halving Add -defm VHADDs : N3VInt_QHS<0,0,0b0000,0, "vhadd.s", int_arm_neon_vhadds, 1>; -defm VHADDu : N3VInt_QHS<1,0,0b0000,0, "vhadd.u", int_arm_neon_vhaddu, 1>; +defm VHADDs : N3VInt_QHS<0,0,0b0000,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vhadd.s", int_arm_neon_vhadds, 1>; +defm VHADDu : N3VInt_QHS<1,0,0b0000,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vhadd.u", int_arm_neon_vhaddu, 1>; // VRHADD : Vector Rounding Halving Add -defm VRHADDs : N3VInt_QHS<0,0,0b0001,0, "vrhadd.s", int_arm_neon_vrhadds, 1>; -defm VRHADDu : N3VInt_QHS<1,0,0b0001,0, "vrhadd.u", int_arm_neon_vrhaddu, 1>; +defm VRHADDs : N3VInt_QHS<0,0,0b0001,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vrhadd.s", int_arm_neon_vrhadds, 1>; +defm VRHADDu : N3VInt_QHS<1,0,0b0001,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vrhadd.u", int_arm_neon_vrhaddu, 1>; // VQADD : Vector Saturating Add -defm VQADDs : N3VInt_QHSD<0,0,0b0000,1, "vqadd.s", int_arm_neon_vqadds, 1>; -defm VQADDu : N3VInt_QHSD<1,0,0b0000,1, "vqadd.u", int_arm_neon_vqaddu, 1>; +defm VQADDs : N3VInt_QHSD<0,0,0b0000,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vqadd.s", int_arm_neon_vqadds, 1>; +defm VQADDu : N3VInt_QHSD<1,0,0b0000,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vqadd.u", int_arm_neon_vqaddu, 1>; // VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn.i", int_arm_neon_vaddhn, 1>; // VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) @@ -1376,15 +1393,15 @@ defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn.i", int_arm_neon_vraddhn, 1>; // VMUL : Vector Multiply (integer, polynomial and floating-point) defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q, IIC_VMULi32Q, "vmul.i", mul, 1>; -def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, "vmul.p8", v8i8, v8i8, +def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16D, "vmul.p8", v8i8, v8i8, int_arm_neon_vmulp, 1>; -def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, "vmul.p8", v16i8, v16i8, +def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16Q, "vmul.p8", v16i8, v16i8, int_arm_neon_vmulp, 1>; def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VBIND, "vmul.f32", v2f32, v2f32, fmul, 1>; def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VBINQ, "vmul.f32", v4f32, v4f32, fmul, 1>; defm VMULsl : N3VSL_HS<0b1000, "vmul.i", mul>; -def VMULslfd : N3VDSL<0b10, 0b1001, "vmul.f32", v2f32, fmul>; -def VMULslfq : N3VQSL<0b10, 0b1001, "vmul.f32", v4f32, v2f32, fmul>; +def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul.f32", v2f32, fmul>; +def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul.f32", v4f32, v2f32, fmul>; def : Pat<(v8i16 (mul (v8i16 QPR:$src1), (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), @@ -1405,8 +1422,12 @@ def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VQDMULH : Vector Saturating Doubling Multiply Returning High Half -defm VQDMULH : N3VInt_HS<0,0,0b1011,0, "vqdmulh.s", int_arm_neon_vqdmulh, 1>; -defm VQDMULHsl: N3VIntSL_HS<0b1100, "vqdmulh.s", int_arm_neon_vqdmulh>; +defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, IIC_VMULi16D, IIC_VMULi32D, + IIC_VMULi16Q, IIC_VMULi32Q, + "vqdmulh.s", int_arm_neon_vqdmulh, 1>; +defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, + IIC_VMULi16Q, IIC_VMULi32Q, + "vqdmulh.s", int_arm_neon_vqdmulh>; def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), @@ -1421,8 +1442,12 @@ def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half -defm VQRDMULH : N3VInt_HS<1,0,0b1011,0, "vqrdmulh.s", int_arm_neon_vqrdmulh, 1>; -defm VQRDMULHsl : N3VIntSL_HS<0b1101, "vqrdmulh.s", int_arm_neon_vqrdmulh>; +defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, IIC_VMULi16D, IIC_VMULi32D, + IIC_VMULi16Q, IIC_VMULi32Q, + "vqrdmulh.s", int_arm_neon_vqrdmulh, 1>; +defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, + IIC_VMULi16Q, IIC_VMULi32Q, + "vqrdmulh.s", int_arm_neon_vqrdmulh>; def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), @@ -1437,26 +1462,28 @@ def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) -defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, "vmull.s", int_arm_neon_vmulls, 1>; -defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, "vmull.u", int_arm_neon_vmullu, 1>; -def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, "vmull.p8", v8i16, v8i8, +defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, "vmull.s", int_arm_neon_vmulls, 1>; +defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, "vmull.u", int_arm_neon_vmullu, 1>; +def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull.p8", v8i16, v8i8, int_arm_neon_vmullp, 1>; -defm VMULLsls : N3VLIntSL_HS<0, 0b1010, "vmull.s", int_arm_neon_vmulls>; -defm VMULLslu : N3VLIntSL_HS<1, 0b1010, "vmull.u", int_arm_neon_vmullu>; +defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull.s", int_arm_neon_vmulls>; +defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull.u", int_arm_neon_vmullu>; // VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) -defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, "vqdmull.s", int_arm_neon_vqdmull, 1>; -defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, "vqdmull.s", int_arm_neon_vqdmull>; +defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, "vqdmull.s", int_arm_neon_vqdmull, 1>; +defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, "vqdmull.s", int_arm_neon_vqdmull>; // Vector Multiply-Accumulate and Multiply-Subtract Operations. // VMLA : Vector Multiply Accumulate (integer and floating-point) -defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, "vmla.i", add>; -def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, "vmla.f32", v2f32, fmul, fadd>; -def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, "vmla.f32", v4f32, fmul, fadd>; -defm VMLAsl : N3VMulOpSL_HS<0b0000, "vmla.i", add>; -def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, "vmla.f32", v2f32, fmul, fadd>; -def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, "vmla.f32", v4f32, v2f32, fmul, fadd>; +defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vmla.i", add>; +def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla.f32", v2f32, fmul, fadd>; +def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla.f32", v4f32, fmul, fadd>; +defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vmla.i", add>; +def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla.f32", v2f32, fmul, fadd>; +def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla.f32", v4f32, v2f32, fmul, fadd>; def : Pat<(v8i16 (add (v8i16 QPR:$src1), (mul (v8i16 QPR:$src2), @@ -1497,12 +1524,14 @@ defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, "vqdmlal.s", int_arm_neon_vqdmlal>; defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal.s", int_arm_neon_vqdmlal>; // VMLS : Vector Multiply Subtract (integer and floating-point) -defm VMLS : N3VMulOp_QHS<0, 0, 0b1001, 0, "vmls.i", sub>; -def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, "vmls.f32", v2f32, fmul, fsub>; -def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, "vmls.f32", v4f32, fmul, fsub>; -defm VMLSsl : N3VMulOpSL_HS<0b0100, "vmls.i", sub>; -def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, "vmls.f32", v2f32, fmul, fsub>; -def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, "vmls.f32", v4f32, v2f32, fmul, fsub>; +defm VMLS : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vmls.i", sub>; +def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls.f32", v2f32, fmul, fsub>; +def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls.f32", v4f32, fmul, fsub>; +defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, + IIC_VMACi16Q, IIC_VMACi32Q, "vmls.i", sub>; +def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls.f32", v2f32, fmul, fsub>; +def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls.f32", v4f32, v2f32, fmul, fsub>; def : Pat<(v8i16 (sub (v8i16 QPR:$src1), (mul (v8i16 QPR:$src2), @@ -1549,17 +1578,21 @@ defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, "vsub.i", sub, def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub.f32", v2f32, v2f32, fsub, 0>; def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub.f32", v4f32, v4f32, fsub, 0>; // VSUBL : Vector Subtract Long (Q = D - D) -defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, "vsubl.s", int_arm_neon_vsubls, 1>; -defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, "vsubl.u", int_arm_neon_vsublu, 1>; +defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, "vsubl.s", int_arm_neon_vsubls, 1>; +defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, "vsubl.u", int_arm_neon_vsublu, 1>; // VSUBW : Vector Subtract Wide (Q = Q - D) defm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw.s", int_arm_neon_vsubws, 0>; defm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw.u", int_arm_neon_vsubwu, 0>; // VHSUB : Vector Halving Subtract -defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, "vhsub.s", int_arm_neon_vhsubs, 0>; -defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, "vhsub.u", int_arm_neon_vhsubu, 0>; +defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vhsub.s", int_arm_neon_vhsubs, 0>; +defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vhsub.u", int_arm_neon_vhsubu, 0>; // VQSUB : Vector Saturing Subtract -defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, "vqsub.s", int_arm_neon_vqsubs, 0>; -defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, "vqsub.u", int_arm_neon_vqsubu, 0>; +defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vqsub.s", int_arm_neon_vqsubs, 0>; +defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vqsub.u", int_arm_neon_vqsubu, 0>; // VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn.i", int_arm_neon_vsubhn, 0>; // VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) @@ -1587,14 +1620,14 @@ defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt.f32", v2i32, v2f32, NEONvcgt, 0>; def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt.f32", v4i32, v4f32, NEONvcgt, 0>; // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) -def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, "vacge.f32", v2i32, v2f32, +def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, IIC_VBIND, "vacge.f32", v2i32, v2f32, int_arm_neon_vacged, 0>; -def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, "vacge.f32", v4i32, v4f32, +def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, IIC_VBINQ, "vacge.f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>; // VACGT : Vector Absolute Compare Greater Than (aka VCAGT) -def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, "vacgt.f32", v2i32, v2f32, +def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, IIC_VBIND, "vacgt.f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>; -def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, "vacgt.f32", v4i32, v4f32, +def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, IIC_VBINQ, "vacgt.f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>; // VTST : Vector Test Bits defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, @@ -1621,7 +1654,7 @@ def VBICd : N3V<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), [(set DPR:$dst, (v2i32 (and DPR:$src1, (vnot_conv DPR:$src2))))]>; def VBICq : N3V<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), - (ins QPR:$src1, QPR:$src2), NoItinerary, + (ins QPR:$src1, QPR:$src2), IIC_VBINiQ, "vbic\t$dst, $src1, $src2", "", [(set QPR:$dst, (v4i32 (and QPR:$src1, (vnot_conv QPR:$src2))))]>; @@ -1633,18 +1666,18 @@ def VORNd : N3V<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst), [(set DPR:$dst, (v2i32 (or DPR:$src1, (vnot_conv DPR:$src2))))]>; def VORNq : N3V<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst), - (ins QPR:$src1, QPR:$src2), NoItinerary, + (ins QPR:$src1, QPR:$src2), IIC_VBINiQ, "vorn\t$dst, $src1, $src2", "", [(set QPR:$dst, (v4i32 (or QPR:$src1, (vnot_conv QPR:$src2))))]>; // VMVN : Vector Bitwise NOT def VMVNd : N2V<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, - (outs DPR:$dst), (ins DPR:$src), NoItinerary, + (outs DPR:$dst), (ins DPR:$src), IIC_VSHLiD, "vmvn\t$dst, $src", "", [(set DPR:$dst, (v2i32 (vnot DPR:$src)))]>; def VMVNq : N2V<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, - (outs QPR:$dst), (ins QPR:$src), NoItinerary, + (outs QPR:$dst), (ins QPR:$src), IIC_VSHLiD, "vmvn\t$dst, $src", "", [(set QPR:$dst, (v4i32 (vnot QPR:$src)))]>; def : Pat<(v2i32 (vnot_conv DPR:$src)), (VMVNd DPR:$src)>; @@ -1652,13 +1685,13 @@ def : Pat<(v4i32 (vnot_conv QPR:$src)), (VMVNq QPR:$src)>; // VBSL : Vector Bitwise Select def VBSLd : N3V<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), - (ins DPR:$src1, DPR:$src2, DPR:$src3), NoItinerary, + (ins DPR:$src1, DPR:$src2, DPR:$src3), IIC_VCNTiD, "vbsl\t$dst, $src2, $src3", "$src1 = $dst", [(set DPR:$dst, (v2i32 (or (and DPR:$src2, DPR:$src1), (and DPR:$src3, (vnot_conv DPR:$src1)))))]>; def VBSLq : N3V<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), - (ins QPR:$src1, QPR:$src2, QPR:$src3), NoItinerary, + (ins QPR:$src1, QPR:$src2, QPR:$src3), IIC_VCNTiQ, "vbsl\t$dst, $src2, $src3", "$src1 = $dst", [(set QPR:$dst, (v4i32 (or (and QPR:$src2, QPR:$src1), @@ -1675,16 +1708,18 @@ def VBSLq : N3V<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), // Vector Absolute Differences. // VABD : Vector Absolute Difference -defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, "vabd.s", int_arm_neon_vabds, 0>; -defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, "vabd.u", int_arm_neon_vabdu, 0>; -def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, "vabd.f32", v2f32, v2f32, +defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vabd.s", int_arm_neon_vabds, 0>; +defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vabd.u", int_arm_neon_vabdu, 0>; +def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, IIC_VBIND, "vabd.f32", v2f32, v2f32, int_arm_neon_vabds, 0>; -def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, "vabd.f32", v4f32, v4f32, +def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vabd.f32", v4f32, v4f32, int_arm_neon_vabds, 0>; // VABDL : Vector Absolute Difference Long (Q = | D - D |) -defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, "vabdl.s", int_arm_neon_vabdls, 0>; -defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, "vabdl.u", int_arm_neon_vabdlu, 0>; +defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VBINi4Q, "vabdl.s", int_arm_neon_vabdls, 0>; +defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VBINi4Q, "vabdl.u", int_arm_neon_vabdlu, 0>; // VABA : Vector Absolute Difference and Accumulate defm VABAs : N3VInt3_QHS<0,1,0b0101,0, "vaba.s", int_arm_neon_vabas>; @@ -1697,31 +1732,35 @@ defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, "vabal.u", int_arm_neon_vabalu>; // Vector Maximum and Minimum. // VMAX : Vector Maximum -defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, "vmax.s", int_arm_neon_vmaxs, 1>; -defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, "vmax.u", int_arm_neon_vmaxu, 1>; -def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, "vmax.f32", v2f32, v2f32, +defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vmax.s", int_arm_neon_vmaxs, 1>; +defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vmax.u", int_arm_neon_vmaxu, 1>; +def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, IIC_VBIND, "vmax.f32", v2f32, v2f32, int_arm_neon_vmaxs, 1>; -def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, "vmax.f32", v4f32, v4f32, +def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, IIC_VBINQ, "vmax.f32", v4f32, v4f32, int_arm_neon_vmaxs, 1>; // VMIN : Vector Minimum -defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, "vmin.s", int_arm_neon_vmins, 1>; -defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, "vmin.u", int_arm_neon_vminu, 1>; -def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, "vmin.f32", v2f32, v2f32, +defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vmin.s", int_arm_neon_vmins, 1>; +defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, + IIC_VBINi4Q, "vmin.u", int_arm_neon_vminu, 1>; +def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, IIC_VBIND, "vmin.f32", v2f32, v2f32, int_arm_neon_vmins, 1>; -def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, "vmin.f32", v4f32, v4f32, +def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, IIC_VBINQ, "vmin.f32", v4f32, v4f32, int_arm_neon_vmins, 1>; // Vector Pairwise Operations. // VPADD : Vector Pairwise Add -def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, "vpadd.i8", v8i8, v8i8, +def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, IIC_VBINiD, "vpadd.i8", v8i8, v8i8, int_arm_neon_vpadd, 0>; -def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, "vpadd.i16", v4i16, v4i16, +def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, IIC_VBINiD, "vpadd.i16", v4i16, v4i16, int_arm_neon_vpadd, 0>; -def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, "vpadd.i32", v2i32, v2i32, +def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, IIC_VBINiD, "vpadd.i32", v2i32, v2i32, int_arm_neon_vpadd, 0>; -def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, "vpadd.f32", v2f32, v2f32, +def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, IIC_VBIND, "vpadd.f32", v2f32, v2f32, int_arm_neon_vpadd, 0>; // VPADDL : Vector Pairwise Add Long @@ -1737,35 +1776,35 @@ defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpadal.u", int_arm_neon_vpadalu>; // VPMAX : Vector Pairwise Maximum -def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, "vpmax.s8", v8i8, v8i8, +def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax.s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>; -def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, "vpmax.s16", v4i16, v4i16, +def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax.s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>; -def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, "vpmax.s32", v2i32, v2i32, +def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax.s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>; -def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, "vpmax.u8", v8i8, v8i8, +def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax.u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>; -def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, "vpmax.u16", v4i16, v4i16, +def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax.u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; -def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, "vpmax.u32", v2i32, v2i32, +def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax.u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; -def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, "vpmax.f32", v2f32, v2f32, +def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, IIC_VBINi4D, "vpmax.f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; // VPMIN : Vector Pairwise Minimum -def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, "vpmin.s8", v8i8, v8i8, +def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin.s8", v8i8, v8i8, int_arm_neon_vpmins, 0>; -def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, "vpmin.s16", v4i16, v4i16, +def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin.s16", v4i16, v4i16, int_arm_neon_vpmins, 0>; -def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, "vpmin.s32", v2i32, v2i32, +def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin.s32", v2i32, v2i32, int_arm_neon_vpmins, 0>; -def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, "vpmin.u8", v8i8, v8i8, +def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin.u8", v8i8, v8i8, int_arm_neon_vpminu, 0>; -def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, "vpmin.u16", v4i16, v4i16, +def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin.u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; -def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, "vpmin.u32", v2i32, v2i32, +def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin.u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; -def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, "vpmin.f32", v2f32, v2f32, +def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, IIC_VBINi4D, "vpmin.f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; // Vector Reciprocal and Reciprocal Square Root Estimate and Step. @@ -1785,9 +1824,9 @@ def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, v4f32, v4f32, int_arm_neon_vrecpe>; // VRECPS : Vector Reciprocal Step -def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, "vrecps.f32", v2f32, v2f32, +def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, IIC_VRECSD, "vrecps.f32", v2f32, v2f32, int_arm_neon_vrecps, 1>; -def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, "vrecps.f32", v4f32, v4f32, +def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, IIC_VRECSQ, "vrecps.f32", v4f32, v4f32, int_arm_neon_vrecps, 1>; // VRSQRTE : Vector Reciprocal Square Root Estimate @@ -1805,21 +1844,23 @@ def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, v4f32, v4f32, int_arm_neon_vrsqrte>; // VRSQRTS : Vector Reciprocal Square Root Step -def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, "vrsqrts.f32", v2f32, v2f32, +def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, IIC_VRECSD, "vrsqrts.f32", v2f32, v2f32, int_arm_neon_vrsqrts, 1>; -def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, "vrsqrts.f32", v4f32, v4f32, +def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, IIC_VRECSQ, "vrsqrts.f32", v4f32, v4f32, int_arm_neon_vrsqrts, 1>; // Vector Shifts. // VSHL : Vector Shift -defm VSHLs : N3VInt_QHSD<0, 0, 0b0100, 0, "vshl.s", int_arm_neon_vshifts, 0>; -defm VSHLu : N3VInt_QHSD<1, 0, 0b0100, 0, "vshl.u", int_arm_neon_vshiftu, 0>; +defm VSHLs : N3VInt_QHSD<0, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, + IIC_VSHLiQ, "vshl.s", int_arm_neon_vshifts, 0>; +defm VSHLu : N3VInt_QHSD<1, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, + IIC_VSHLiQ, "vshl.u", int_arm_neon_vshiftu, 0>; // VSHL : Vector Shift Left (Immediate) -defm VSHLi : N2VSh_QHSD<0, 1, 0b0111, 1, "vshl.i", NEONvshl>; +defm VSHLi : N2VSh_QHSD<0, 1, 0b0111, 1, IIC_VSHLiD, "vshl.i", NEONvshl>; // VSHR : Vector Shift Right (Immediate) -defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, "vshr.s", NEONvshrs>; -defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, "vshr.u", NEONvshru>; +defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr.s", NEONvshrs>; +defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr.u", NEONvshru>; // VSHLL : Vector Shift Left Long def VSHLLs8 : N2VLSh<0, 1, 0b001000, 0b1010, 0, 0, 1, "vshll.s8", @@ -1844,86 +1885,90 @@ def VSHLLi32 : N2VLSh<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll.i32", v2i64, v2i32, NEONvshlli>; // VSHRN : Vector Shift Right and Narrow -def VSHRN16 : N2VNSh<0, 1, 0b001000, 0b1000, 0, 0, 1, "vshrn.i16", - v8i8, v8i16, NEONvshrn>; -def VSHRN32 : N2VNSh<0, 1, 0b010000, 0b1000, 0, 0, 1, "vshrn.i32", - v4i16, v4i32, NEONvshrn>; -def VSHRN64 : N2VNSh<0, 1, 0b100000, 0b1000, 0, 0, 1, "vshrn.i64", - v2i32, v2i64, NEONvshrn>; +def VSHRN16 : N2VNSh<0, 1, 0b001000, 0b1000, 0, 0, 1, + IIC_VSHLiD, "vshrn.i16", v8i8, v8i16, NEONvshrn>; +def VSHRN32 : N2VNSh<0, 1, 0b010000, 0b1000, 0, 0, 1, + IIC_VSHLiD, "vshrn.i32", v4i16, v4i32, NEONvshrn>; +def VSHRN64 : N2VNSh<0, 1, 0b100000, 0b1000, 0, 0, 1, + IIC_VSHLiD, "vshrn.i64", v2i32, v2i64, NEONvshrn>; // VRSHL : Vector Rounding Shift -defm VRSHLs : N3VInt_QHSD<0,0,0b0101,0, "vrshl.s", int_arm_neon_vrshifts, 0>; -defm VRSHLu : N3VInt_QHSD<1,0,0b0101,0, "vrshl.u", int_arm_neon_vrshiftu, 0>; +defm VRSHLs : N3VInt_QHSD<0,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, + IIC_VSHLi4Q, "vrshl.s", int_arm_neon_vrshifts, 0>; +defm VRSHLu : N3VInt_QHSD<1,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, + IIC_VSHLi4Q, "vrshl.u", int_arm_neon_vrshiftu, 0>; // VRSHR : Vector Rounding Shift Right -defm VRSHRs : N2VSh_QHSD<0, 1, 0b0010, 1, "vrshr.s", NEONvrshrs>; -defm VRSHRu : N2VSh_QHSD<1, 1, 0b0010, 1, "vrshr.u", NEONvrshru>; +defm VRSHRs : N2VSh_QHSD<0, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr.s", NEONvrshrs>; +defm VRSHRu : N2VSh_QHSD<1, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr.u", NEONvrshru>; // VRSHRN : Vector Rounding Shift Right and Narrow -def VRSHRN16 : N2VNSh<0, 1, 0b001000, 0b1000, 0, 1, 1, "vrshrn.i16", - v8i8, v8i16, NEONvrshrn>; -def VRSHRN32 : N2VNSh<0, 1, 0b010000, 0b1000, 0, 1, 1, "vrshrn.i32", - v4i16, v4i32, NEONvrshrn>; -def VRSHRN64 : N2VNSh<0, 1, 0b100000, 0b1000, 0, 1, 1, "vrshrn.i64", - v2i32, v2i64, NEONvrshrn>; +def VRSHRN16 : N2VNSh<0, 1, 0b001000, 0b1000, 0, 1, 1, + IIC_VSHLi4D, "vrshrn.i16", v8i8, v8i16, NEONvrshrn>; +def VRSHRN32 : N2VNSh<0, 1, 0b010000, 0b1000, 0, 1, 1, + IIC_VSHLi4D, "vrshrn.i32", v4i16, v4i32, NEONvrshrn>; +def VRSHRN64 : N2VNSh<0, 1, 0b100000, 0b1000, 0, 1, 1, + IIC_VSHLi4D, "vrshrn.i64", v2i32, v2i64, NEONvrshrn>; // VQSHL : Vector Saturating Shift -defm VQSHLs : N3VInt_QHSD<0,0,0b0100,1, "vqshl.s", int_arm_neon_vqshifts, 0>; -defm VQSHLu : N3VInt_QHSD<1,0,0b0100,1, "vqshl.u", int_arm_neon_vqshiftu, 0>; +defm VQSHLs : N3VInt_QHSD<0,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, + IIC_VSHLi4Q, "vqshl.s", int_arm_neon_vqshifts, 0>; +defm VQSHLu : N3VInt_QHSD<1,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, + IIC_VSHLi4Q, "vqshl.u", int_arm_neon_vqshiftu, 0>; // VQSHL : Vector Saturating Shift Left (Immediate) -defm VQSHLsi : N2VSh_QHSD<0, 1, 0b0111, 1, "vqshl.s", NEONvqshls>; -defm VQSHLui : N2VSh_QHSD<1, 1, 0b0111, 1, "vqshl.u", NEONvqshlu>; +defm VQSHLsi : N2VSh_QHSD<0, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl.s", NEONvqshls>; +defm VQSHLui : N2VSh_QHSD<1, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl.u", NEONvqshlu>; // VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) -defm VQSHLsu : N2VSh_QHSD<1, 1, 0b0110, 1, "vqshlu.s", NEONvqshlsu>; +defm VQSHLsu : N2VSh_QHSD<1, 1, 0b0110, 1, IIC_VSHLi4D, "vqshlu.s", NEONvqshlsu>; // VQSHRN : Vector Saturating Shift Right and Narrow -def VQSHRNs16 : N2VNSh<0, 1, 0b001000, 0b1001, 0, 0, 1, "vqshrn.s16", - v8i8, v8i16, NEONvqshrns>; -def VQSHRNs32 : N2VNSh<0, 1, 0b010000, 0b1001, 0, 0, 1, "vqshrn.s32", - v4i16, v4i32, NEONvqshrns>; -def VQSHRNs64 : N2VNSh<0, 1, 0b100000, 0b1001, 0, 0, 1, "vqshrn.s64", - v2i32, v2i64, NEONvqshrns>; -def VQSHRNu16 : N2VNSh<1, 1, 0b001000, 0b1001, 0, 0, 1, "vqshrn.u16", - v8i8, v8i16, NEONvqshrnu>; -def VQSHRNu32 : N2VNSh<1, 1, 0b010000, 0b1001, 0, 0, 1, "vqshrn.u32", - v4i16, v4i32, NEONvqshrnu>; -def VQSHRNu64 : N2VNSh<1, 1, 0b100000, 0b1001, 0, 0, 1, "vqshrn.u64", - v2i32, v2i64, NEONvqshrnu>; +def VQSHRNs16 : N2VNSh<0, 1, 0b001000, 0b1001, 0, 0, 1, + IIC_VSHLi4D, "vqshrn.s16", v8i8, v8i16, NEONvqshrns>; +def VQSHRNs32 : N2VNSh<0, 1, 0b010000, 0b1001, 0, 0, 1, + IIC_VSHLi4D, "vqshrn.s32", v4i16, v4i32, NEONvqshrns>; +def VQSHRNs64 : N2VNSh<0, 1, 0b100000, 0b1001, 0, 0, 1, + IIC_VSHLi4D, "vqshrn.s64", v2i32, v2i64, NEONvqshrns>; +def VQSHRNu16 : N2VNSh<1, 1, 0b001000, 0b1001, 0, 0, 1, + IIC_VSHLi4D, "vqshrn.u16", v8i8, v8i16, NEONvqshrnu>; +def VQSHRNu32 : N2VNSh<1, 1, 0b010000, 0b1001, 0, 0, 1, + IIC_VSHLi4D, "vqshrn.u32", v4i16, v4i32, NEONvqshrnu>; +def VQSHRNu64 : N2VNSh<1, 1, 0b100000, 0b1001, 0, 0, 1, + IIC_VSHLi4D, "vqshrn.u64", v2i32, v2i64, NEONvqshrnu>; // VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) -def VQSHRUN16 : N2VNSh<1, 1, 0b001000, 0b1000, 0, 0, 1, "vqshrun.s16", - v8i8, v8i16, NEONvqshrnsu>; -def VQSHRUN32 : N2VNSh<1, 1, 0b010000, 0b1000, 0, 0, 1, "vqshrun.s32", - v4i16, v4i32, NEONvqshrnsu>; -def VQSHRUN64 : N2VNSh<1, 1, 0b100000, 0b1000, 0, 0, 1, "vqshrun.s64", - v2i32, v2i64, NEONvqshrnsu>; +def VQSHRUN16 : N2VNSh<1, 1, 0b001000, 0b1000, 0, 0, 1, + IIC_VSHLi4D, "vqshrun.s16", v8i8, v8i16, NEONvqshrnsu>; +def VQSHRUN32 : N2VNSh<1, 1, 0b010000, 0b1000, 0, 0, 1, + IIC_VSHLi4D, "vqshrun.s32", v4i16, v4i32, NEONvqshrnsu>; +def VQSHRUN64 : N2VNSh<1, 1, 0b100000, 0b1000, 0, 0, 1, + IIC_VSHLi4D, "vqshrun.s64", v2i32, v2i64, NEONvqshrnsu>; // VQRSHL : Vector Saturating Rounding Shift -defm VQRSHLs : N3VInt_QHSD<0, 0, 0b0101, 1, "vqrshl.s", - int_arm_neon_vqrshifts, 0>; -defm VQRSHLu : N3VInt_QHSD<1, 0, 0b0101, 1, "vqrshl.u", - int_arm_neon_vqrshiftu, 0>; +defm VQRSHLs : N3VInt_QHSD<0, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, + IIC_VSHLi4Q, "vqrshl.s", int_arm_neon_vqrshifts, 0>; +defm VQRSHLu : N3VInt_QHSD<1, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, + IIC_VSHLi4Q, "vqrshl.u", int_arm_neon_vqrshiftu, 0>; // VQRSHRN : Vector Saturating Rounding Shift Right and Narrow -def VQRSHRNs16: N2VNSh<0, 1, 0b001000, 0b1001, 0, 1, 1, "vqrshrn.s16", - v8i8, v8i16, NEONvqrshrns>; -def VQRSHRNs32: N2VNSh<0, 1, 0b010000, 0b1001, 0, 1, 1, "vqrshrn.s32", - v4i16, v4i32, NEONvqrshrns>; -def VQRSHRNs64: N2VNSh<0, 1, 0b100000, 0b1001, 0, 1, 1, "vqrshrn.s64", - v2i32, v2i64, NEONvqrshrns>; -def VQRSHRNu16: N2VNSh<1, 1, 0b001000, 0b1001, 0, 1, 1, "vqrshrn.u16", - v8i8, v8i16, NEONvqrshrnu>; -def VQRSHRNu32: N2VNSh<1, 1, 0b010000, 0b1001, 0, 1, 1, "vqrshrn.u32", - v4i16, v4i32, NEONvqrshrnu>; -def VQRSHRNu64: N2VNSh<1, 1, 0b100000, 0b1001, 0, 1, 1, "vqrshrn.u64", - v2i32, v2i64, NEONvqrshrnu>; +def VQRSHRNs16: N2VNSh<0, 1, 0b001000, 0b1001, 0, 1, 1, + IIC_VSHLi4D, "vqrshrn.s16", v8i8, v8i16, NEONvqrshrns>; +def VQRSHRNs32: N2VNSh<0, 1, 0b010000, 0b1001, 0, 1, 1, + IIC_VSHLi4D, "vqrshrn.s32", v4i16, v4i32, NEONvqrshrns>; +def VQRSHRNs64: N2VNSh<0, 1, 0b100000, 0b1001, 0, 1, 1, + IIC_VSHLi4D, "vqrshrn.s64", v2i32, v2i64, NEONvqrshrns>; +def VQRSHRNu16: N2VNSh<1, 1, 0b001000, 0b1001, 0, 1, 1, + IIC_VSHLi4D, "vqrshrn.u16", v8i8, v8i16, NEONvqrshrnu>; +def VQRSHRNu32: N2VNSh<1, 1, 0b010000, 0b1001, 0, 1, 1, + IIC_VSHLi4D, "vqrshrn.u32", v4i16, v4i32, NEONvqrshrnu>; +def VQRSHRNu64: N2VNSh<1, 1, 0b100000, 0b1001, 0, 1, 1, + IIC_VSHLi4D, "vqrshrn.u64", v2i32, v2i64, NEONvqrshrnu>; // VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) -def VQRSHRUN16: N2VNSh<1, 1, 0b001000, 0b1000, 0, 1, 1, "vqrshrun.s16", - v8i8, v8i16, NEONvqrshrnsu>; -def VQRSHRUN32: N2VNSh<1, 1, 0b010000, 0b1000, 0, 1, 1, "vqrshrun.s32", - v4i16, v4i32, NEONvqrshrnsu>; -def VQRSHRUN64: N2VNSh<1, 1, 0b100000, 0b1000, 0, 1, 1, "vqrshrun.s64", - v2i32, v2i64, NEONvqrshrnsu>; +def VQRSHRUN16: N2VNSh<1, 1, 0b001000, 0b1000, 0, 1, 1, + IIC_VSHLi4D, "vqrshrun.s16", v8i8, v8i16, NEONvqrshrnsu>; +def VQRSHRUN32: N2VNSh<1, 1, 0b010000, 0b1000, 0, 1, 1, + IIC_VSHLi4D, "vqrshrun.s32", v4i16, v4i32, NEONvqrshrnsu>; +def VQRSHRUN64: N2VNSh<1, 1, 0b100000, 0b1000, 0, 1, 1, + IIC_VSHLi4D, "vqrshrun.s64", v2i32, v2i64, NEONvqrshrnsu>; // VSRA : Vector Shift Right and Accumulate defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra.s", NEONvshrs>; @@ -1962,13 +2007,11 @@ def vneg_conv : PatFrag<(ops node:$in), (sub immAllZerosV_bc, node:$in)>; class VNEGD<bits<2> size, string OpcodeStr, ValueType Ty> : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$dst), (ins DPR:$src), - NoItinerary, - !strconcat(OpcodeStr, "\t$dst, $src"), "", + IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "", [(set DPR:$dst, (Ty (vneg DPR:$src)))]>; class VNEGQ<bits<2> size, string OpcodeStr, ValueType Ty> : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src), - NoItinerary, - !strconcat(OpcodeStr, "\t$dst, $src"), "", + IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "", [(set QPR:$dst, (Ty (vneg QPR:$src)))]>; // VNEG : Vector Negate @@ -1981,11 +2024,11 @@ def VNEGs32q : VNEGQ<0b10, "vneg.s32", v4i32>; // VNEG : Vector Negate (floating-point) def VNEGf32d : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, - (outs DPR:$dst), (ins DPR:$src), NoItinerary, + (outs DPR:$dst), (ins DPR:$src), IIC_VUNAD, "vneg.f32\t$dst, $src", "", [(set DPR:$dst, (v2f32 (fneg DPR:$src)))]>; def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, - (outs QPR:$dst), (ins QPR:$src), NoItinerary, + (outs QPR:$dst), (ins QPR:$src), IIC_VUNAQ, "vneg.f32\t$dst, $src", "", [(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>; @@ -2024,9 +2067,9 @@ def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, // VMOV : Vector Move (Register) def VMOVD : N3V<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src), - NoItinerary, "vmov\t$dst, $src", "", []>; + IIC_VMOVD, "vmov\t$dst, $src", "", []>; def VMOVQ : N3V<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src), - NoItinerary, "vmov\t$dst, $src", "", []>; + IIC_VMOVD, "vmov\t$dst, $src", "", []>; // VMOV : Vector Move (Immediate) @@ -2066,38 +2109,38 @@ def vmovImm64 : PatLeaf<(build_vector), [{ // be encoded based on the immed values. def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst), - (ins i8imm:$SIMM), NoItinerary, + (ins i8imm:$SIMM), IIC_VMOVImm, "vmov.i8\t$dst, $SIMM", "", [(set DPR:$dst, (v8i8 vmovImm8:$SIMM))]>; def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst), - (ins i8imm:$SIMM), NoItinerary, + (ins i8imm:$SIMM), IIC_VMOVImm, "vmov.i8\t$dst, $SIMM", "", [(set QPR:$dst, (v16i8 vmovImm8:$SIMM))]>; def VMOVv4i16 : N1ModImm<1, 0b000, 0b1000, 0, 0, 0, 1, (outs DPR:$dst), - (ins i16imm:$SIMM), NoItinerary, + (ins i16imm:$SIMM), IIC_VMOVImm, "vmov.i16\t$dst, $SIMM", "", [(set DPR:$dst, (v4i16 vmovImm16:$SIMM))]>; def VMOVv8i16 : N1ModImm<1, 0b000, 0b1000, 0, 1, 0, 1, (outs QPR:$dst), - (ins i16imm:$SIMM), NoItinerary, + (ins i16imm:$SIMM), IIC_VMOVImm, "vmov.i16\t$dst, $SIMM", "", [(set QPR:$dst, (v8i16 vmovImm16:$SIMM))]>; def VMOVv2i32 : N1ModImm<1, 0b000, 0b0000, 0, 0, 0, 1, (outs DPR:$dst), - (ins i32imm:$SIMM), NoItinerary, + (ins i32imm:$SIMM), IIC_VMOVImm, "vmov.i32\t$dst, $SIMM", "", [(set DPR:$dst, (v2i32 vmovImm32:$SIMM))]>; def VMOVv4i32 : N1ModImm<1, 0b000, 0b0000, 0, 1, 0, 1, (outs QPR:$dst), - (ins i32imm:$SIMM), NoItinerary, + (ins i32imm:$SIMM), IIC_VMOVImm, "vmov.i32\t$dst, $SIMM", "", [(set QPR:$dst, (v4i32 vmovImm32:$SIMM))]>; def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst), - (ins i64imm:$SIMM), NoItinerary, + (ins i64imm:$SIMM), IIC_VMOVImm, "vmov.i64\t$dst, $SIMM", "", [(set DPR:$dst, (v1i64 vmovImm64:$SIMM))]>; def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst), - (ins i64imm:$SIMM), NoItinerary, + (ins i64imm:$SIMM), IIC_VMOVImm, "vmov.i64\t$dst, $SIMM", "", [(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>; @@ -2105,27 +2148,27 @@ def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst), def VGETLNs8 : NVGetLane<0b11100101, 0b1011, 0b00, (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), - NoItinerary, "vmov", ".s8\t$dst, $src[$lane]", + IIC_VMOVSI, "vmov", ".s8\t$dst, $src[$lane]", [(set GPR:$dst, (NEONvgetlanes (v8i8 DPR:$src), imm:$lane))]>; def VGETLNs16 : NVGetLane<0b11100001, 0b1011, 0b01, (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), - NoItinerary, "vmov", ".s16\t$dst, $src[$lane]", + IIC_VMOVSI, "vmov", ".s16\t$dst, $src[$lane]", [(set GPR:$dst, (NEONvgetlanes (v4i16 DPR:$src), imm:$lane))]>; def VGETLNu8 : NVGetLane<0b11101101, 0b1011, 0b00, (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), - NoItinerary, "vmov", ".u8\t$dst, $src[$lane]", + IIC_VMOVSI, "vmov", ".u8\t$dst, $src[$lane]", [(set GPR:$dst, (NEONvgetlaneu (v8i8 DPR:$src), imm:$lane))]>; def VGETLNu16 : NVGetLane<0b11101001, 0b1011, 0b01, (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), - NoItinerary, "vmov", ".u16\t$dst, $src[$lane]", + IIC_VMOVSI, "vmov", ".u16\t$dst, $src[$lane]", [(set GPR:$dst, (NEONvgetlaneu (v4i16 DPR:$src), imm:$lane))]>; def VGETLNi32 : NVGetLane<0b11100001, 0b1011, 0b00, (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), - NoItinerary, "vmov", ".32\t$dst, $src[$lane]", + IIC_VMOVSI, "vmov", ".32\t$dst, $src[$lane]", [(set GPR:$dst, (extractelt (v2i32 DPR:$src), imm:$lane))]>; // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td @@ -2166,17 +2209,17 @@ def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), let Constraints = "$src1 = $dst" in { def VSETLNi8 : NVSetLane<0b11100100, 0b1011, 0b00, (outs DPR:$dst), (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), - NoItinerary, "vmov", ".8\t$dst[$lane], $src2", + IIC_VMOVISL, "vmov", ".8\t$dst[$lane], $src2", [(set DPR:$dst, (vector_insert (v8i8 DPR:$src1), GPR:$src2, imm:$lane))]>; def VSETLNi16 : NVSetLane<0b11100000, 0b1011, 0b01, (outs DPR:$dst), (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), - NoItinerary, "vmov", ".16\t$dst[$lane], $src2", + IIC_VMOVISL, "vmov", ".16\t$dst[$lane], $src2", [(set DPR:$dst, (vector_insert (v4i16 DPR:$src1), GPR:$src2, imm:$lane))]>; def VSETLNi32 : NVSetLane<0b11100000, 0b1011, 0b00, (outs DPR:$dst), (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), - NoItinerary, "vmov", ".32\t$dst[$lane], $src2", + IIC_VMOVISL, "vmov", ".32\t$dst[$lane], $src2", [(set DPR:$dst, (insertelt (v2i32 DPR:$src1), GPR:$src2, imm:$lane))]>; } @@ -2242,11 +2285,11 @@ def : Pat<(v4i32 (scalar_to_vector GPR:$src)), class VDUPD<bits<8> opcod1, bits<2> opcod3, string asmSize, ValueType Ty> : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$dst), (ins GPR:$src), - NoItinerary, "vdup", !strconcat(asmSize, "\t$dst, $src"), + IIC_VMOVIS, "vdup", !strconcat(asmSize, "\t$dst, $src"), [(set DPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>; class VDUPQ<bits<8> opcod1, bits<2> opcod3, string asmSize, ValueType Ty> : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$dst), (ins GPR:$src), - NoItinerary, "vdup", !strconcat(asmSize, "\t$dst, $src"), + IIC_VMOVIS, "vdup", !strconcat(asmSize, "\t$dst, $src"), [(set QPR:$dst, (Ty (NEONvdup (i32 GPR:$src))))]>; def VDUP8d : VDUPD<0b11101100, 0b00, ".8", v8i8>; @@ -2257,11 +2300,11 @@ def VDUP16q : VDUPQ<0b11101010, 0b01, ".16", v8i16>; def VDUP32q : VDUPQ<0b11101010, 0b00, ".32", v4i32>; def VDUPfd : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$dst), (ins GPR:$src), - NoItinerary, "vdup", ".32\t$dst, $src", + IIC_VMOVIS, "vdup", ".32\t$dst, $src", [(set DPR:$dst, (v2f32 (NEONvdup (f32 (bitconvert GPR:$src)))))]>; def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src), - NoItinerary, "vdup", ".32\t$dst, $src", + IIC_VMOVIS, "vdup", ".32\t$dst, $src", [(set QPR:$dst, (v4f32 (NEONvdup (f32 (bitconvert GPR:$src)))))]>; @@ -2269,14 +2312,14 @@ def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src), class VDUPLND<bits<2> op19_18, bits<2> op17_16, string OpcodeStr, ValueType Ty> : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 0, 0, - (outs DPR:$dst), (ins DPR:$src, nohash_imm:$lane), NoItinerary, + (outs DPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD, !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "", [(set DPR:$dst, (Ty (NEONvduplane (Ty DPR:$src), imm:$lane)))]>; class VDUPLNQ<bits<2> op19_18, bits<2> op17_16, string OpcodeStr, ValueType ResTy, ValueType OpTy> : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 1, 0, - (outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane), NoItinerary, + (outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD, !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "", [(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src), imm:$lane)))]>; @@ -2308,12 +2351,12 @@ def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), def VDUPfdf : N2V<0b11, 0b11, 0b01, 0b00, 0b11000, 0, 0, (outs DPR:$dst), (ins SPR:$src), - NoItinerary, "vdup.32\t$dst, ${src:lane}", "", + IIC_VMOVD, "vdup.32\t$dst, ${src:lane}", "", [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>; def VDUPfqf : N2V<0b11, 0b11, 0b01, 0b00, 0b11000, 1, 0, (outs QPR:$dst), (ins SPR:$src), - NoItinerary, "vdup.32\t$dst, ${src:lane}", "", + IIC_VMOVD, "vdup.32\t$dst, ${src:lane}", "", [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; def : Pat<(v2i64 (NEONvduplane (v2i64 QPR:$src), imm:$lane)), @@ -2387,12 +2430,12 @@ def VCVTxu2fq : N2VCvtQ<1, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.u32", class VREV64D<bits<2> op19_18, string OpcodeStr, ValueType Ty> : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$dst), - (ins DPR:$src), NoItinerary, + (ins DPR:$src), IIC_VMOVD, !strconcat(OpcodeStr, "\t$dst, $src"), "", [(set DPR:$dst, (Ty (NEONvrev64 (Ty DPR:$src))))]>; class VREV64Q<bits<2> op19_18, string OpcodeStr, ValueType Ty> : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$dst), - (ins QPR:$src), NoItinerary, + (ins QPR:$src), IIC_VMOVD, !strconcat(OpcodeStr, "\t$dst, $src"), "", [(set QPR:$dst, (Ty (NEONvrev64 (Ty QPR:$src))))]>; @@ -2410,12 +2453,12 @@ def VREV64qf : VREV64Q<0b10, "vrev64.32", v4f32>; class VREV32D<bits<2> op19_18, string OpcodeStr, ValueType Ty> : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$dst), - (ins DPR:$src), NoItinerary, + (ins DPR:$src), IIC_VMOVD, !strconcat(OpcodeStr, "\t$dst, $src"), "", [(set DPR:$dst, (Ty (NEONvrev32 (Ty DPR:$src))))]>; class VREV32Q<bits<2> op19_18, string OpcodeStr, ValueType Ty> : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$dst), - (ins QPR:$src), NoItinerary, + (ins QPR:$src), IIC_VMOVD, !strconcat(OpcodeStr, "\t$dst, $src"), "", [(set QPR:$dst, (Ty (NEONvrev32 (Ty QPR:$src))))]>; @@ -2429,12 +2472,12 @@ def VREV32q16 : VREV32Q<0b01, "vrev32.16", v8i16>; class VREV16D<bits<2> op19_18, string OpcodeStr, ValueType Ty> : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$dst), - (ins DPR:$src), NoItinerary, + (ins DPR:$src), IIC_VMOVD, !strconcat(OpcodeStr, "\t$dst, $src"), "", [(set DPR:$dst, (Ty (NEONvrev16 (Ty DPR:$src))))]>; class VREV16Q<bits<2> op19_18, string OpcodeStr, ValueType Ty> : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$dst), - (ins QPR:$src), NoItinerary, + (ins QPR:$src), IIC_VMOVD, !strconcat(OpcodeStr, "\t$dst, $src"), "", [(set QPR:$dst, (Ty (NEONvrev16 (Ty QPR:$src))))]>; @@ -2447,14 +2490,14 @@ def VREV16q8 : VREV16Q<0b00, "vrev16.8", v16i8>; class VEXTd<string OpcodeStr, ValueType Ty> : N3V<0,1,0b11,0b0000,0,0, (outs DPR:$dst), - (ins DPR:$lhs, DPR:$rhs, i32imm:$index), NoItinerary, + (ins DPR:$lhs, DPR:$rhs, i32imm:$index), IIC_VEXTD, !strconcat(OpcodeStr, "\t$dst, $lhs, $rhs, $index"), "", [(set DPR:$dst, (Ty (NEONvext (Ty DPR:$lhs), (Ty DPR:$rhs), imm:$index)))]>; class VEXTq<string OpcodeStr, ValueType Ty> : N3V<0,1,0b11,0b0000,1,0, (outs QPR:$dst), - (ins QPR:$lhs, QPR:$rhs, i32imm:$index), NoItinerary, + (ins QPR:$lhs, QPR:$rhs, i32imm:$index), IIC_VEXTQ, !strconcat(OpcodeStr, "\t$dst, $lhs, $rhs, $index"), "", [(set QPR:$dst, (Ty (NEONvext (Ty QPR:$lhs), (Ty QPR:$rhs), imm:$index)))]>; @@ -2504,24 +2547,24 @@ def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip.32">; // VTBL : Vector Table Lookup def VTBL1 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$dst), - (ins DPR:$tbl1, DPR:$src), NoItinerary, + (ins DPR:$tbl1, DPR:$src), IIC_VTB1, "vtbl.8\t$dst, \\{$tbl1\\}, $src", "", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl1 DPR:$tbl1, DPR:$src)))]>; def VTBL2 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$dst), - (ins DPR:$tbl1, DPR:$tbl2, DPR:$src), NoItinerary, + (ins DPR:$tbl1, DPR:$tbl2, DPR:$src), IIC_VTB2, "vtbl.8\t$dst, \\{$tbl1,$tbl2\\}, $src", "", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl2 DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>; def VTBL3 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$dst), - (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), NoItinerary, + (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), IIC_VTB3, "vtbl.8\t$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl3 DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>; def VTBL4 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$dst), - (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), NoItinerary, + (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), IIC_VTB4, "vtbl.8\t$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl4 DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>; @@ -2529,25 +2572,25 @@ def VTBL4 // VTBX : Vector Table Extension def VTBX1 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$dst), - (ins DPR:$orig, DPR:$tbl1, DPR:$src), NoItinerary, + (ins DPR:$orig, DPR:$tbl1, DPR:$src), IIC_VTBX1, "vtbx.8\t$dst, \\{$tbl1\\}, $src", "$orig = $dst", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx1 DPR:$orig, DPR:$tbl1, DPR:$src)))]>; def VTBX2 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$dst), - (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), NoItinerary, + (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), IIC_VTBX2, "vtbx.8\t$dst, \\{$tbl1,$tbl2\\}, $src", "$orig = $dst", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx2 DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>; def VTBX3 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$dst), - (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), NoItinerary, + (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), IIC_VTBX3, "vtbx.8\t$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "$orig = $dst", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx3 DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>; def VTBX4 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1, - DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), NoItinerary, + DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), IIC_VTBX4, "vtbx.8\t$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "$orig = $dst", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx4 DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>; @@ -2576,11 +2619,11 @@ def : N3VDsPat<fmul, VMULfd_sfp>; // Vector Multiply-Accumulate/Subtract used for single-precision FP let neverHasSideEffects = 1 in -def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, "vmla.f32", v2f32,fmul,fadd>; +def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla.f32", v2f32,fmul,fadd>; def : N3VDMulOpsPat<fmul, fadd, VMLAfd_sfp>; let neverHasSideEffects = 1 in -def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, "vmls.f32", v2f32,fmul,fsub>; +def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls.f32", v2f32,fmul,fsub>; def : N3VDMulOpsPat<fmul, fsub, VMLSfd_sfp>; // Vector Absolute used for single-precision FP diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index a476df0..4ecc79d 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -225,16 +225,16 @@ def FSQRTS : ASuI<0b11101011, 0b0001, 0b1100, (outs SPR:$dst), (ins SPR:$a), // def FMRS : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src), - IIC_fpMOVSI, "fmrs", " $dst, $src", + IIC_VMOVSI, "fmrs", " $dst, $src", [(set GPR:$dst, (bitconvert SPR:$src))]>; def FMSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src), - IIC_fpMOVIS, "fmsr", " $dst, $src", + IIC_VMOVIS, "fmsr", " $dst, $src", [(set SPR:$dst, (bitconvert GPR:$src))]>; def FMRRD : AVConv3I<0b11000101, 0b1011, (outs GPR:$dst1, GPR:$dst2), (ins DPR:$src), - IIC_fpMOVDI, "fmrrd", " $dst1, $dst2, $src", + IIC_VMOVDI, "fmrrd", " $dst1, $dst2, $src", [/* FIXME: Can't write pattern for multiple result instr*/]>; // FMDHR: GPR -> SPR @@ -242,7 +242,7 @@ def FMRRD : AVConv3I<0b11000101, 0b1011, def FMDRR : AVConv5I<0b11000100, 0b1011, (outs DPR:$dst), (ins GPR:$src1, GPR:$src2), - IIC_fpMOVID, "fmdrr", " $dst, $src1, $src2", + IIC_VMOVID, "fmdrr", " $dst, $src1, $src2", [(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]>; // FMRDH: SPR -> GPR diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index 4dc369a..fc4c5f5 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -63,10 +63,6 @@ def IIC_iStoresiu : InstrItinClass; def IIC_iStorem : InstrItinClass; def IIC_Br : InstrItinClass; def IIC_fpSTAT : InstrItinClass; -def IIC_fpMOVIS : InstrItinClass; -def IIC_fpMOVID : InstrItinClass; -def IIC_fpMOVSI : InstrItinClass; -def IIC_fpMOVDI : InstrItinClass; def IIC_fpUNA32 : InstrItinClass; def IIC_fpUNA64 : InstrItinClass; def IIC_fpCMP32 : InstrItinClass; @@ -102,11 +98,21 @@ def IIC_VUNAD : InstrItinClass; def IIC_VUNAQ : InstrItinClass; def IIC_VBIND : InstrItinClass; def IIC_VBINQ : InstrItinClass; +def IIC_VMOVImm : InstrItinClass; def IIC_VMOVD : InstrItinClass; def IIC_VMOVQ : InstrItinClass; +def IIC_VMOVIS : InstrItinClass; +def IIC_VMOVID : InstrItinClass; +def IIC_VMOVISL : InstrItinClass; +def IIC_VMOVSI : InstrItinClass; +def IIC_VMOVDI : InstrItinClass; def IIC_VPERMD : InstrItinClass; def IIC_VPERMQ : InstrItinClass; def IIC_VPERMQ3 : InstrItinClass; +def IIC_VMACD : InstrItinClass; +def IIC_VMACQ : InstrItinClass; +def IIC_VRECSD : InstrItinClass; +def IIC_VRECSQ : InstrItinClass; def IIC_VCNTiD : InstrItinClass; def IIC_VCNTiQ : InstrItinClass; def IIC_VUNAiD : InstrItinClass; @@ -119,10 +125,30 @@ def IIC_VSUBiD : InstrItinClass; def IIC_VSUBiQ : InstrItinClass; def IIC_VBINi4D : InstrItinClass; def IIC_VBINi4Q : InstrItinClass; +def IIC_VSHLiD : InstrItinClass; +def IIC_VSHLiQ : InstrItinClass; +def IIC_VSHLi4D : InstrItinClass; +def IIC_VSHLi4Q : InstrItinClass; +def IIC_VPALiD : InstrItinClass; +def IIC_VPALiQ : InstrItinClass; def IIC_VMULi16D : InstrItinClass; def IIC_VMULi32D : InstrItinClass; def IIC_VMULi16Q : InstrItinClass; def IIC_VMULi32Q : InstrItinClass; +def IIC_VMACi16D : InstrItinClass; +def IIC_VMACi32D : InstrItinClass; +def IIC_VMACi16Q : InstrItinClass; +def IIC_VMACi32Q : InstrItinClass; +def IIC_VEXTD : InstrItinClass; +def IIC_VEXTQ : InstrItinClass; +def IIC_VTB1 : InstrItinClass; +def IIC_VTB2 : InstrItinClass; +def IIC_VTB3 : InstrItinClass; +def IIC_VTB4 : InstrItinClass; +def IIC_VTBX1 : InstrItinClass; +def IIC_VTBX2 : InstrItinClass; +def IIC_VTBX3 : InstrItinClass; +def IIC_VTBX4 : InstrItinClass; //===----------------------------------------------------------------------===// // Processor instruction itineraries. diff --git a/lib/Target/ARM/ARMScheduleV7.td b/lib/Target/ARM/ARMScheduleV7.td index 33af58a..e565813 100644 --- a/lib/Target/ARM/ARMScheduleV7.td +++ b/lib/Target/ARM/ARMScheduleV7.td @@ -171,23 +171,7 @@ def CortexA8Itineraries : ProcessorItineraries<[ // // FP Special Register to Integer Register File Move InstrItinData<IIC_fpSTAT , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe], 1>]>, - // - // Integer to Single-Precision FP Register File Move - InstrItinData<IIC_fpMOVIS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe], 1>]>, - // - // Integer to Double-Precision FP Register File Move - InstrItinData<IIC_fpMOVID , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe], 1>]>, - // - // Single-Precision FP to Integer Register File Move - InstrItinData<IIC_fpMOVSI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe], 1>], [20, 1]>, - // - // Double-Precision FP to Integer Register File Move - InstrItinData<IIC_fpMOVDI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe], 1>], [20, 20, 1]>, + InstrStage<1, [FU_NLSPipe]>]>, // // Single-precision FP Unary InstrItinData<IIC_fpUNA32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, @@ -385,6 +369,10 @@ def CortexA8Itineraries : ProcessorItineraries<[ InstrItinData<IIC_VBINQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<2, [FU_NPipe]>], [6, 2, 2]>, // + // Move Immediate + InstrItinData<IIC_VMOVImm, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_NPipe]>], [3]>, + // // Double-register Permute Move InstrItinData<IIC_VMOVD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NLSPipe]>], [2, 1]>, @@ -395,6 +383,26 @@ def CortexA8Itineraries : ProcessorItineraries<[ InstrItinData<IIC_VMOVQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<2, [FU_NLSPipe]>], [3, 1]>, // + // Integer to Single-precision Move + InstrItinData<IIC_VMOVIS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_NLSPipe]>], [2, 1]>, + // + // Integer to Double-precision Move + InstrItinData<IIC_VMOVID , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_NLSPipe]>], [2, 1, 1]>, + // + // Single-precision to Integer Move + InstrItinData<IIC_VMOVSI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_NLSPipe]>], [20, 1]>, + // + // Double-precision to Integer Move + InstrItinData<IIC_VMOVDI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_NLSPipe]>], [20, 20, 1]>, + // + // Integer to Lane Move + InstrItinData<IIC_VMOVISL , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<2, [FU_NLSPipe]>], [3, 1, 1]>, + // // Double-register Permute InstrItinData<IIC_VPERMD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NLSPipe]>], [2, 2, 1, 1]>, @@ -413,15 +421,33 @@ def CortexA8Itineraries : ProcessorItineraries<[ InstrStage<1, [FU_NPipe], 0>, InstrStage<2, [FU_NLSPipe]>], [4, 4, 1, 1]>, // + // Double-register FP Multiple-Accumulate + InstrItinData<IIC_VMACD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_NPipe]>], [9, 2, 2, 3]>, + // + // Quad-register FP Multiple-Accumulate + // Result written in N9, but that is relative to the last cycle of multicycle, + // so we use 10 for those cases + InstrItinData<IIC_VMACQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<2, [FU_NPipe]>], [10, 2, 2, 3]>, + // + // Double-register Reciprical Step + InstrItinData<IIC_VRECSD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_NPipe]>], [9, 2, 2]>, + // + // Quad-register Reciprical Step + InstrItinData<IIC_VRECSQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<2, [FU_NPipe]>], [10, 2, 2]>, + // // Double-register Integer Count InstrItinData<IIC_VCNTiD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [3, 2]>, + InstrStage<1, [FU_NPipe]>], [3, 2, 2]>, // // Quad-register Integer Count // Result written in N3, but that is relative to the last cycle of multicycle, // so we use 4 for those cases InstrItinData<IIC_VCNTiQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [4, 2]>, + InstrStage<2, [FU_NPipe]>], [4, 2, 2]>, // // Double-register Integer Unary InstrItinData<IIC_VUNAiD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, @@ -463,6 +489,30 @@ def CortexA8Itineraries : ProcessorItineraries<[ InstrItinData<IIC_VSUBiQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [3, 2, 1]>, // + // Double-register Integer Shift + InstrItinData<IIC_VSHLiD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_NPipe]>], [3, 1, 1]>, + // + // Quad-register Integer Shift + InstrItinData<IIC_VSHLiQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<2, [FU_NPipe]>], [4, 1, 1]>, + // + // Double-register Integer Shift (4 cycle) + InstrItinData<IIC_VSHLi4D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_NPipe]>], [4, 1, 1]>, + // + // Quad-register Integer Shift (4 cycle) + InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<2, [FU_NPipe]>], [5, 1, 1]>, + // + // Double-register Integer Pair Add Long + InstrItinData<IIC_VPALiD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_NPipe]>], [6, 3, 2, 1]>, + // + // Quad-register Integer Pair Add Long + InstrItinData<IIC_VPALiQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<2, [FU_NPipe]>], [7, 3, 2, 1]>, + // // Double-register Integer Multiply (.8, .16) InstrItinData<IIC_VMULi16D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [6, 2, 2]>, @@ -479,7 +529,59 @@ def CortexA8Itineraries : ProcessorItineraries<[ InstrItinData<IIC_VMULi32Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>, InstrStage<2, [FU_NLSPipe], 0>, - InstrStage<3, [FU_NPipe]>], [9, 2, 1]> - - + InstrStage<3, [FU_NPipe]>], [9, 2, 1]>, + // + // Double-register Integer Multiply-Accumulate (.8, .16) + InstrItinData<IIC_VMACi16D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_NPipe]>], [6, 2, 2, 3]>, + // + // Double-register Integer Multiply-Accumulate (.32) + InstrItinData<IIC_VMACi32D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<2, [FU_NPipe]>], [7, 2, 1, 3]>, + // + // Quad-register Integer Multiply-Accumulate (.8, .16) + InstrItinData<IIC_VMACi16Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<2, [FU_NPipe]>], [7, 2, 2, 3]>, + // + // Quad-register Integer Multiply-Accumulate (.32) + InstrItinData<IIC_VMACi32Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_NPipe]>, + InstrStage<2, [FU_NLSPipe], 0>, + InstrStage<3, [FU_NPipe]>], [9, 2, 1, 3]>, + // + // Double-register VEXT + InstrItinData<IIC_VEXTD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_NLSPipe]>], [2, 1, 1]>, + // + // Quad-register VEXT + InstrItinData<IIC_VEXTQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<2, [FU_NLSPipe]>], [3, 1, 1]>, + // + // VTB + InstrItinData<IIC_VTB1, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<2, [FU_NLSPipe]>], [3, 2, 1]>, + InstrItinData<IIC_VTB2, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<2, [FU_NLSPipe]>], [3, 2, 2, 1]>, + InstrItinData<IIC_VTB3, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_NLSPipe]>, + InstrStage<1, [FU_NPipe], 0>, + InstrStage<2, [FU_NLSPipe]>], [4, 2, 2, 3, 1]>, + InstrItinData<IIC_VTB4, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_NLSPipe]>, + InstrStage<1, [FU_NPipe], 0>, + InstrStage<2, [FU_NLSPipe]>], [4, 2, 2, 3, 3, 1]>, + // + // VTBX + InstrItinData<IIC_VTBX1, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<2, [FU_NLSPipe]>], [3, 1, 2, 1]>, + InstrItinData<IIC_VTBX2, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<2, [FU_NLSPipe]>], [3, 1, 2, 2, 1]>, + InstrItinData<IIC_VTBX3, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_NLSPipe]>, + InstrStage<1, [FU_NPipe], 0>, + InstrStage<2, [FU_NLSPipe]>], [4, 1, 2, 2, 3, 1]>, + InstrItinData<IIC_VTBX4, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrStage<1, [FU_NLSPipe]>, + InstrStage<1, [FU_NPipe], 0>, + InstrStage<2, [FU_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]> ]>; |