diff options
Diffstat (limited to 'lib/Target/ARM/ARMInstrNEON.td')
| -rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 1521 |
1 files changed, 1187 insertions, 334 deletions
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 412b3ca..c40860d 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -39,6 +39,10 @@ def nImmVMOVI32 : Operand<i32> { let PrintMethod = "printNEONModImmOperand"; let ParserMatchClass = nImmVMOVI32AsmOperand; } +def nImmVMOVF32 : Operand<i32> { + let PrintMethod = "printFPImmOperand"; + let ParserMatchClass = FPImmOperand; +} def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; } def nImmSplatI64 : Operand<i32> { let PrintMethod = "printNEONModImmOperand"; @@ -70,13 +74,131 @@ def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{ let MIOperandInfo = (ops i32imm); } +// Register list of one D register. def VecListOneDAsmOperand : AsmOperandClass { let Name = "VecListOneD"; let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; } def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> { let ParserMatchClass = VecListOneDAsmOperand; } +// Register list of two sequential D registers. +def VecListTwoDAsmOperand : AsmOperandClass { + let Name = "VecListTwoD"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListTwoD : RegisterOperand<DPR, "printVectorListTwo"> { + let ParserMatchClass = VecListTwoDAsmOperand; +} +// Register list of three sequential D registers. +def VecListThreeDAsmOperand : AsmOperandClass { + let Name = "VecListThreeD"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> { + let ParserMatchClass = VecListThreeDAsmOperand; +} +// Register list of four sequential D registers. +def VecListFourDAsmOperand : AsmOperandClass { + let Name = "VecListFourD"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> { + let ParserMatchClass = VecListFourDAsmOperand; +} +// Register list of two D registers spaced by 2 (two sequential Q registers). +def VecListTwoQAsmOperand : AsmOperandClass { + let Name = "VecListTwoQ"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListTwoQ : RegisterOperand<DPR, "printVectorListTwoSpaced"> { + let ParserMatchClass = VecListTwoQAsmOperand; +} + +// Register list of one D register, with "all lanes" subscripting. +def VecListOneDAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListOneDAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> { + let ParserMatchClass = VecListOneDAllLanesAsmOperand; +} +// Register list of two D registers, with "all lanes" subscripting. +def VecListTwoDAllLanesAsmOperand : AsmOperandClass { + let Name = "VecListTwoDAllLanes"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListOperands"; +} +def VecListTwoDAllLanes : RegisterOperand<DPR, "printVectorListTwoAllLanes"> { + let ParserMatchClass = VecListTwoDAllLanesAsmOperand; +} + +// Register list of one D register, with byte lane subscripting. +def VecListOneDByteIndexAsmOperand : AsmOperandClass { + let Name = "VecListOneDByteIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListOneDByteIndexed : Operand<i32> { + let ParserMatchClass = VecListOneDByteIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with half-word lane subscripting. +def VecListOneDHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListOneDHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListOneDHWordIndexed : Operand<i32> { + let ParserMatchClass = VecListOneDHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListOneDWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListOneDWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListOneDWordIndexed : Operand<i32> { + let ParserMatchClass = VecListOneDWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// Register list of two D registers, with byte lane subscripting. +def VecListTwoDByteIndexAsmOperand : AsmOperandClass { + let Name = "VecListTwoDByteIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListTwoDByteIndexed : Operand<i32> { + let ParserMatchClass = VecListTwoDByteIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with half-word lane subscripting. +def VecListTwoDHWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListTwoDHWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListTwoDHWordIndexed : Operand<i32> { + let ParserMatchClass = VecListTwoDHWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} +// ...with word lane subscripting. +def VecListTwoDWordIndexAsmOperand : AsmOperandClass { + let Name = "VecListTwoDWordIndexed"; + let ParserMethod = "parseVectorList"; + let RenderMethod = "addVecListIndexedOperands"; +} +def VecListTwoDWordIndexed : Operand<i32> { + let ParserMatchClass = VecListTwoDWordIndexAsmOperand; + let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); +} //===----------------------------------------------------------------------===// // NEON-specific DAG Nodes. @@ -141,6 +263,7 @@ def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>; def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; +def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>; def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>; @@ -227,12 +350,31 @@ class VLDQWBPseudo<InstrItinClass itin> : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), (ins addrmode6:$addr, am6offset:$offset), itin, "$addr.addr = $wb">; +class VLDQWBfixedPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), + (ins addrmode6:$addr), itin, + "$addr.addr = $wb">; +class VLDQWBregisterPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), + (ins addrmode6:$addr, rGPR:$offset), itin, + "$addr.addr = $wb">; + class VLDQQPseudo<InstrItinClass itin> : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">; class VLDQQWBPseudo<InstrItinClass itin> : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), (ins addrmode6:$addr, am6offset:$offset), itin, "$addr.addr = $wb">; +class VLDQQWBfixedPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), + (ins addrmode6:$addr), itin, + "$addr.addr = $wb">; +class VLDQQWBregisterPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), + (ins addrmode6:$addr, rGPR:$offset), itin, + "$addr.addr = $wb">; + + class VLDQQQQPseudo<InstrItinClass itin> : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin, "$src = $dst">; @@ -245,17 +387,17 @@ let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // VLD1 : Vector Load (multiple single elements) class VLD1D<bits<4> op7_4, string Dt> - : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$Vd), + : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd), (ins addrmode6:$Rn), IIC_VLD1, - "vld1", Dt, "\\{$Vd\\}, $Rn", "", []> { + "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDInstruction"; } class VLD1Q<bits<4> op7_4, string Dt> - : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2), + : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd), (ins addrmode6:$Rn), IIC_VLD1x2, - "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> { + "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDInstruction"; @@ -277,53 +419,90 @@ def VLD1q32Pseudo : VLDQPseudo<IIC_VLD1x2>; def VLD1q64Pseudo : VLDQPseudo<IIC_VLD1x2>; // ...with address register writeback: -class VLD1DWB<bits<4> op7_4, string Dt> - : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$Vd, GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1u, - "vld1", Dt, "\\{$Vd\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; +multiclass VLD1DWB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), + (ins addrmode6:$Rn), IIC_VLD1u, + "vld1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbFixed"; + } + def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1u, + "vld1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbRegister"; + } } -class VLD1QWB<bits<4> op7_4, string Dt> - : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x2u, - "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; +multiclass VLD1QWB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd, GPR:$wb), + (ins addrmode6:$Rn), IIC_VLD1x2u, + "vld1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbFixed"; + } + def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd, GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, + "vld1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbRegister"; + } } -def VLD1d8_UPD : VLD1DWB<{0,0,0,?}, "8">; -def VLD1d16_UPD : VLD1DWB<{0,1,0,?}, "16">; -def VLD1d32_UPD : VLD1DWB<{1,0,0,?}, "32">; -def VLD1d64_UPD : VLD1DWB<{1,1,0,?}, "64">; - -def VLD1q8_UPD : VLD1QWB<{0,0,?,?}, "8">; -def VLD1q16_UPD : VLD1QWB<{0,1,?,?}, "16">; -def VLD1q32_UPD : VLD1QWB<{1,0,?,?}, "32">; -def VLD1q64_UPD : VLD1QWB<{1,1,?,?}, "64">; - -def VLD1q8Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>; -def VLD1q16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>; -def VLD1q32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>; -def VLD1q64Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>; - -// ...with 3 registers (some of these are only for the disassembler): +defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8">; +defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16">; +defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32">; +defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64">; +defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8">; +defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16">; +defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32">; +defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64">; + +def VLD1q8PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1x2u>; +def VLD1q16PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1x2u>; +def VLD1q32PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1x2u>; +def VLD1q64PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1x2u>; +def VLD1q8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1x2u>; +def VLD1q16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1x2u>; +def VLD1q32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1x2u>; +def VLD1q64PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1x2u>; + +// ...with 3 registers class VLD1D3<bits<4> op7_4, string Dt> - : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), + : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), (ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt, - "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { + "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDInstruction"; } -class VLD1D3WB<bits<4> op7_4, string Dt> - : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x3u, "vld1", Dt, - "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", "$Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; +multiclass VLD1D3WB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), + (ins addrmode6:$Rn), IIC_VLD1x2u, + "vld1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbFixed"; + } + def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, + "vld1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbRegister"; + } } def VLD1d8T : VLD1D3<{0,0,0,?}, "8">; @@ -331,31 +510,40 @@ def VLD1d16T : VLD1D3<{0,1,0,?}, "16">; def VLD1d32T : VLD1D3<{1,0,0,?}, "32">; def VLD1d64T : VLD1D3<{1,1,0,?}, "64">; -def VLD1d8T_UPD : VLD1D3WB<{0,0,0,?}, "8">; -def VLD1d16T_UPD : VLD1D3WB<{0,1,0,?}, "16">; -def VLD1d32T_UPD : VLD1D3WB<{1,0,0,?}, "32">; -def VLD1d64T_UPD : VLD1D3WB<{1,1,0,?}, "64">; +defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8">; +defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16">; +defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32">; +defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64">; -def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>; -def VLD1d64TPseudo_UPD : VLDQQWBPseudo<IIC_VLD1x3u>; +def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>; -// ...with 4 registers (some of these are only for the disassembler): +// ...with 4 registers class VLD1D4<bits<4> op7_4, string Dt> - : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), + : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd), (ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt, - "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { + "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDInstruction"; } -class VLD1D4WB<bits<4> op7_4, string Dt> - : NLdSt<0,0b10,0b0010,op7_4, - (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x4u, "vld1", Dt, - "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", "$Rn.addr = $wb", - []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; +multiclass VLD1D4WB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), + (ins addrmode6:$Rn), IIC_VLD1x2u, + "vld1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbFixed"; + } + def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, + "vld1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbRegister"; + } } def VLD1d8Q : VLD1D4<{0,0,?,?}, "8">; @@ -363,40 +551,31 @@ def VLD1d16Q : VLD1D4<{0,1,?,?}, "16">; def VLD1d32Q : VLD1D4<{1,0,?,?}, "32">; def VLD1d64Q : VLD1D4<{1,1,?,?}, "64">; -def VLD1d8Q_UPD : VLD1D4WB<{0,0,?,?}, "8">; -def VLD1d16Q_UPD : VLD1D4WB<{0,1,?,?}, "16">; -def VLD1d32Q_UPD : VLD1D4WB<{1,0,?,?}, "32">; -def VLD1d64Q_UPD : VLD1D4WB<{1,1,?,?}, "64">; +defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8">; +defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16">; +defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32">; +defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64">; -def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>; -def VLD1d64QPseudo_UPD : VLDQQWBPseudo<IIC_VLD1x4u>; +def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>; // VLD2 : Vector Load (multiple 2-element structures) -class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt> - : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), - (ins addrmode6:$Rn), IIC_VLD2, - "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> { - let Rm = 0b1111; - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; -} -class VLD2Q<bits<4> op7_4, string Dt> - : NLdSt<0, 0b10, 0b0011, op7_4, - (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), - (ins addrmode6:$Rn), IIC_VLD2x2, - "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { +class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, + InstrItinClass itin> + : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd), + (ins addrmode6:$Rn), itin, + "vld2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDInstruction"; } -def VLD2d8 : VLD2D<0b1000, {0,0,?,?}, "8">; -def VLD2d16 : VLD2D<0b1000, {0,1,?,?}, "16">; -def VLD2d32 : VLD2D<0b1000, {1,0,?,?}, "32">; +def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VLD2>; +def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VLD2>; +def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VLD2>; -def VLD2q8 : VLD2Q<{0,0,?,?}, "8">; -def VLD2q16 : VLD2Q<{0,1,?,?}, "16">; -def VLD2q32 : VLD2Q<{1,0,?,?}, "32">; +def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>; +def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>; +def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>; def VLD2d8Pseudo : VLDQPseudo<IIC_VLD2>; def VLD2d16Pseudo : VLDQPseudo<IIC_VLD2>; @@ -407,47 +586,56 @@ def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>; def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>; // ...with address register writeback: -class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> - : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2u, - "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; -} -class VLD2QWB<bits<4> op7_4, string Dt> - : NLdSt<0, 0b10, 0b0011, op7_4, - (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2x2u, - "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; +multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt, + RegisterOperand VdTy, InstrItinClass itin> { + def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), + (ins addrmode6:$Rn), itin, + "vld2", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbFixed"; + } + def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm), itin, + "vld2", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbRegister"; + } } -def VLD2d8_UPD : VLD2DWB<0b1000, {0,0,?,?}, "8">; -def VLD2d16_UPD : VLD2DWB<0b1000, {0,1,?,?}, "16">; -def VLD2d32_UPD : VLD2DWB<0b1000, {1,0,?,?}, "32">; - -def VLD2q8_UPD : VLD2QWB<{0,0,?,?}, "8">; -def VLD2q16_UPD : VLD2QWB<{0,1,?,?}, "16">; -def VLD2q32_UPD : VLD2QWB<{1,0,?,?}, "32">; - -def VLD2d8Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>; -def VLD2d16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>; -def VLD2d32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>; - -def VLD2q8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>; -def VLD2q16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>; -def VLD2q32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>; - -// ...with double-spaced registers (for disassembly only): -def VLD2b8 : VLD2D<0b1001, {0,0,?,?}, "8">; -def VLD2b16 : VLD2D<0b1001, {0,1,?,?}, "16">; -def VLD2b32 : VLD2D<0b1001, {1,0,?,?}, "32">; -def VLD2b8_UPD : VLD2DWB<0b1001, {0,0,?,?}, "8">; -def VLD2b16_UPD : VLD2DWB<0b1001, {0,1,?,?}, "16">; -def VLD2b32_UPD : VLD2DWB<0b1001, {1,0,?,?}, "32">; +defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VLD2u>; +defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VLD2u>; +defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VLD2u>; + +defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>; +defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>; +defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>; + +def VLD2d8PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>; +def VLD2d16PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>; +def VLD2d32PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD2u>; +def VLD2d8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>; +def VLD2d16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>; +def VLD2d32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD2u>; + +def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; +def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; +def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>; +def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; +def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; +def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>; + +// ...with double-spaced registers +def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VLD2>; +def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VLD2>; +def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VLD2>; +defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VLD2u>; +defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VLD2u>; +defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VLD2u>; // VLD3 : Vector Load (multiple 3-element structures) class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -907,9 +1095,11 @@ def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; // VLD1DUP : Vector Load (single element to all lanes) class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> - : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd), (ins addrmode6dup:$Rn), - IIC_VLD1dup, "vld1", Dt, "\\{$Vd[]\\}, $Rn", "", - [(set DPR:$Vd, (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { + : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd), + (ins addrmode6dup:$Rn), + IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", + [(set VecListOneDAllLanes:$Vd, + (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; @@ -935,9 +1125,9 @@ def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { class VLD1QDUP<bits<4> op7_4, string Dt> - : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2), + : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListTwoDAllLanes:$Vd), (ins addrmode6dup:$Rn), IIC_VLD1dup, - "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> { + "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; @@ -948,32 +1138,63 @@ def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">; def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">; // ...with address register writeback: -class VLD1DUPWB<bits<4> op7_4, string Dt> - : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, GPR:$wb), - (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu, - "vld1", Dt, "\\{$Vd[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLD1DupInstruction"; +multiclass VLD1DUPWB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, + (outs VecListOneDAllLanes:$Vd, GPR:$wb), + (ins addrmode6dup:$Rn), IIC_VLD1dupu, + "vld1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD1DupInstruction"; + let AsmMatchConverter = "cvtVLDwbFixed"; + } + def _register : NLdSt<1, 0b10, 0b1100, op7_4, + (outs VecListOneDAllLanes:$Vd, GPR:$wb), + (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, + "vld1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD1DupInstruction"; + let AsmMatchConverter = "cvtVLDwbRegister"; + } } -class VLD1QDUPWB<bits<4> op7_4, string Dt> - : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), - (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu, - "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLD1DupInstruction"; +multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, + (outs VecListTwoDAllLanes:$Vd, GPR:$wb), + (ins addrmode6dup:$Rn), IIC_VLD1dupu, + "vld1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD1DupInstruction"; + let AsmMatchConverter = "cvtVLDwbFixed"; + } + def _register : NLdSt<1, 0b10, 0b1100, op7_4, + (outs VecListTwoDAllLanes:$Vd, GPR:$wb), + (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, + "vld1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLD1DupInstruction"; + let AsmMatchConverter = "cvtVLDwbRegister"; + } } -def VLD1DUPd8_UPD : VLD1DUPWB<{0,0,0,0}, "8">; -def VLD1DUPd16_UPD : VLD1DUPWB<{0,1,0,?}, "16">; -def VLD1DUPd32_UPD : VLD1DUPWB<{1,0,0,?}, "32">; +defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8">; +defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16">; +defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32">; -def VLD1DUPq8_UPD : VLD1QDUPWB<{0,0,1,0}, "8">; -def VLD1DUPq16_UPD : VLD1QDUPWB<{0,1,1,?}, "16">; -def VLD1DUPq32_UPD : VLD1QDUPWB<{1,0,1,?}, "32">; +defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8">; +defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">; +defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">; -def VLD1DUPq8Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>; -def VLD1DUPq16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>; -def VLD1DUPq32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>; +def VLD1DUPq8PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>; +def VLD1DUPq16PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>; +def VLD1DUPq32PseudoWB_fixed : VLDQWBfixedPseudo<IIC_VLD1dupu>; +def VLD1DUPq8PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>; +def VLD1DUPq16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>; +def VLD1DUPq32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>; // VLD2DUP : Vector Load (single 2-element structure to all lanes) class VLD2DUP<bits<4> op7_4, string Dt> @@ -1123,6 +1344,14 @@ class VSTQWBPseudo<InstrItinClass itin> : PseudoNLdSt<(outs GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin, "$addr.addr = $wb">; +class VSTQWBfixedPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, QPR:$src), itin, + "$addr.addr = $wb">; +class VSTQWBregisterPseudo<InstrItinClass itin> + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin, + "$addr.addr = $wb">; class VSTQQPseudo<InstrItinClass itin> : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">; class VSTQQWBPseudo<InstrItinClass itin> @@ -1138,16 +1367,15 @@ class VSTQQQQWBPseudo<InstrItinClass itin> // VST1 : Vector Store (multiple single elements) class VST1D<bits<4> op7_4, string Dt> - : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd), - IIC_VST1, "vst1", Dt, "\\{$Vd\\}, $Rn", "", []> { + : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, VecListOneD:$Vd), + IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVSTInstruction"; } class VST1Q<bits<4> op7_4, string Dt> - : NLdSt<0,0b00,0b1010,op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2), IIC_VST1x2, - "vst1", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> { + : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListTwoD:$Vd), + IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVSTInstruction"; @@ -1169,128 +1397,172 @@ def VST1q32Pseudo : VSTQPseudo<IIC_VST1x2>; def VST1q64Pseudo : VSTQPseudo<IIC_VST1x2>; // ...with address register writeback: -class VST1DWB<bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd), IIC_VST1u, - "vst1", Dt, "\\{$Vd\\}, $Rn$Rm", "$Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; +multiclass VST1DWB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, VecListOneD:$Vd), IIC_VLD1u, + "vst1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbFixed"; + } + def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm, VecListOneD:$Vd), + IIC_VLD1u, + "vst1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbRegister"; + } } -class VST1QWB<bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2), - IIC_VST1x2u, "vst1", Dt, "\\{$Vd, $src2\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; +multiclass VST1QWB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, VecListTwoD:$Vd), IIC_VLD1x2u, + "vst1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbFixed"; + } + def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm, VecListTwoD:$Vd), + IIC_VLD1x2u, + "vst1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbRegister"; + } } -def VST1d8_UPD : VST1DWB<{0,0,0,?}, "8">; -def VST1d16_UPD : VST1DWB<{0,1,0,?}, "16">; -def VST1d32_UPD : VST1DWB<{1,0,0,?}, "32">; -def VST1d64_UPD : VST1DWB<{1,1,0,?}, "64">; +defm VST1d8wb : VST1DWB<{0,0,0,?}, "8">; +defm VST1d16wb : VST1DWB<{0,1,0,?}, "16">; +defm VST1d32wb : VST1DWB<{1,0,0,?}, "32">; +defm VST1d64wb : VST1DWB<{1,1,0,?}, "64">; -def VST1q8_UPD : VST1QWB<{0,0,?,?}, "8">; -def VST1q16_UPD : VST1QWB<{0,1,?,?}, "16">; -def VST1q32_UPD : VST1QWB<{1,0,?,?}, "32">; -def VST1q64_UPD : VST1QWB<{1,1,?,?}, "64">; +defm VST1q8wb : VST1QWB<{0,0,?,?}, "8">; +defm VST1q16wb : VST1QWB<{0,1,?,?}, "16">; +defm VST1q32wb : VST1QWB<{1,0,?,?}, "32">; +defm VST1q64wb : VST1QWB<{1,1,?,?}, "64">; -def VST1q8Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>; -def VST1q16Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>; -def VST1q32Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>; -def VST1q64Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>; +def VST1q8PseudoWB_fixed : VSTQWBfixedPseudo<IIC_VST1x2u>; +def VST1q16PseudoWB_fixed : VSTQWBfixedPseudo<IIC_VST1x2u>; +def VST1q32PseudoWB_fixed : VSTQWBfixedPseudo<IIC_VST1x2u>; +def VST1q64PseudoWB_fixed : VSTQWBfixedPseudo<IIC_VST1x2u>; +def VST1q8PseudoWB_register : VSTQWBregisterPseudo<IIC_VST1x2u>; +def VST1q16PseudoWB_register : VSTQWBregisterPseudo<IIC_VST1x2u>; +def VST1q32PseudoWB_register : VSTQWBregisterPseudo<IIC_VST1x2u>; +def VST1q64PseudoWB_register : VSTQWBregisterPseudo<IIC_VST1x2u>; -// ...with 3 registers (some of these are only for the disassembler): +// ...with 3 registers class VST1D3<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0110, op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), - IIC_VST1x3, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { + (ins addrmode6:$Rn, VecListThreeD:$Vd), + IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVSTInstruction"; } -class VST1D3WB<bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, - DPR:$Vd, DPR:$src2, DPR:$src3), - IIC_VST1x3u, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; +multiclass VST1D3WB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, + "vst1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbFixed"; + } + def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd), + IIC_VLD1x3u, + "vst1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbRegister"; + } } -def VST1d8T : VST1D3<{0,0,0,?}, "8">; -def VST1d16T : VST1D3<{0,1,0,?}, "16">; -def VST1d32T : VST1D3<{1,0,0,?}, "32">; -def VST1d64T : VST1D3<{1,1,0,?}, "64">; +def VST1d8T : VST1D3<{0,0,0,?}, "8">; +def VST1d16T : VST1D3<{0,1,0,?}, "16">; +def VST1d32T : VST1D3<{1,0,0,?}, "32">; +def VST1d64T : VST1D3<{1,1,0,?}, "64">; -def VST1d8T_UPD : VST1D3WB<{0,0,0,?}, "8">; -def VST1d16T_UPD : VST1D3WB<{0,1,0,?}, "16">; -def VST1d32T_UPD : VST1D3WB<{1,0,0,?}, "32">; -def VST1d64T_UPD : VST1D3WB<{1,1,0,?}, "64">; +defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8">; +defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16">; +defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">; +defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">; -def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>; -def VST1d64TPseudo_UPD : VSTQQWBPseudo<IIC_VST1x3u>; +def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>; +def VST1d64TPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x3u>; +def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>; -// ...with 4 registers (some of these are only for the disassembler): +// ...with 4 registers class VST1D4<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0010, op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST1x4, "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", "", + (ins addrmode6:$Rn, VecListFourD:$Vd), + IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVSTInstruction"; } -class VST1D4WB<bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, - DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST1x4u, - "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; +multiclass VST1D4WB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, + "vst1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbFixed"; + } + def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), + IIC_VLD1x4u, + "vst1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbRegister"; + } } -def VST1d8Q : VST1D4<{0,0,?,?}, "8">; -def VST1d16Q : VST1D4<{0,1,?,?}, "16">; -def VST1d32Q : VST1D4<{1,0,?,?}, "32">; -def VST1d64Q : VST1D4<{1,1,?,?}, "64">; +def VST1d8Q : VST1D4<{0,0,?,?}, "8">; +def VST1d16Q : VST1D4<{0,1,?,?}, "16">; +def VST1d32Q : VST1D4<{1,0,?,?}, "32">; +def VST1d64Q : VST1D4<{1,1,?,?}, "64">; -def VST1d8Q_UPD : VST1D4WB<{0,0,?,?}, "8">; -def VST1d16Q_UPD : VST1D4WB<{0,1,?,?}, "16">; -def VST1d32Q_UPD : VST1D4WB<{1,0,?,?}, "32">; -def VST1d64Q_UPD : VST1D4WB<{1,1,?,?}, "64">; +defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8">; +defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16">; +defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">; +defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">; -def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>; -def VST1d64QPseudo_UPD : VSTQQWBPseudo<IIC_VST1x4u>; +def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>; +def VST1d64QPseudoWB_fixed : VSTQQWBPseudo<IIC_VST1x4u>; +def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>; // VST2 : Vector Store (multiple 2-element structures) -class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, op11_8, op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2), - IIC_VST2, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> { - let Rm = 0b1111; - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; -} -class VST2Q<bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, 0b0011, op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST2x2, "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", - "", []> { +class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, + InstrItinClass itin> + : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, VdTy:$Vd), + itin, "vst2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVSTInstruction"; } -def VST2d8 : VST2D<0b1000, {0,0,?,?}, "8">; -def VST2d16 : VST2D<0b1000, {0,1,?,?}, "16">; -def VST2d32 : VST2D<0b1000, {1,0,?,?}, "32">; +def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VST2>; +def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VST2>; +def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VST2>; -def VST2q8 : VST2Q<{0,0,?,?}, "8">; -def VST2q16 : VST2Q<{0,1,?,?}, "16">; -def VST2q32 : VST2Q<{1,0,?,?}, "32">; +def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2>; +def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>; +def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>; def VST2d8Pseudo : VSTQPseudo<IIC_VST2>; def VST2d16Pseudo : VSTQPseudo<IIC_VST2>; @@ -1301,47 +1573,76 @@ def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>; def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>; // ...with address register writeback: -class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2), - IIC_VST2u, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; +multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt, + RegisterOperand VdTy> { + def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, VdTy:$Vd), IIC_VLD1u, + "vst2", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbFixed"; + } + def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, + "vst2", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbRegister"; + } } -class VST2QWB<bits<4> op7_4, string Dt> - : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, - DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST2x2u, - "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; +multiclass VST2QWB<bits<4> op7_4, string Dt> { + def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1u, + "vst2", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbFixed"; + } + def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), + IIC_VLD1u, + "vst2", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbRegister"; + } } -def VST2d8_UPD : VST2DWB<0b1000, {0,0,?,?}, "8">; -def VST2d16_UPD : VST2DWB<0b1000, {0,1,?,?}, "16">; -def VST2d32_UPD : VST2DWB<0b1000, {1,0,?,?}, "32">; - -def VST2q8_UPD : VST2QWB<{0,0,?,?}, "8">; -def VST2q16_UPD : VST2QWB<{0,1,?,?}, "16">; -def VST2q32_UPD : VST2QWB<{1,0,?,?}, "32">; - -def VST2d8Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>; -def VST2d16Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>; -def VST2d32Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>; - -def VST2q8Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>; -def VST2q16Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>; -def VST2q32Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>; - -// ...with double-spaced registers (for disassembly only): -def VST2b8 : VST2D<0b1001, {0,0,?,?}, "8">; -def VST2b16 : VST2D<0b1001, {0,1,?,?}, "16">; -def VST2b32 : VST2D<0b1001, {1,0,?,?}, "32">; -def VST2b8_UPD : VST2DWB<0b1001, {0,0,?,?}, "8">; -def VST2b16_UPD : VST2DWB<0b1001, {0,1,?,?}, "16">; -def VST2b32_UPD : VST2DWB<0b1001, {1,0,?,?}, "32">; +defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListTwoD>; +defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListTwoD>; +defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListTwoD>; + +defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">; +defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">; +defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">; + +def VST2d8PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>; +def VST2d16PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>; +def VST2d32PseudoWB_fixed : VSTQWBPseudo<IIC_VST2u>; +def VST2d8PseudoWB_register : VSTQWBPseudo<IIC_VST2u>; +def VST2d16PseudoWB_register : VSTQWBPseudo<IIC_VST2u>; +def VST2d32PseudoWB_register : VSTQWBPseudo<IIC_VST2u>; + +def VST2q8PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>; +def VST2q16PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>; +def VST2q32PseudoWB_fixed : VSTQQWBPseudo<IIC_VST2x2u>; +def VST2q8PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>; +def VST2q16PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>; +def VST2q32PseudoWB_register : VSTQQWBPseudo<IIC_VST2x2u>; + +// ...with double-spaced registers +def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VST2>; +def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VST2>; +def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VST2>; +defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListTwoQ>; +defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListTwoQ>; +defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListTwoQ>; // VST3 : Vector Store (multiple 3-element structures) class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -1615,10 +1916,10 @@ def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>; // ...with address register writeback: class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$addr, am6offset:$offset, - DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, - "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset", - "$addr.addr = $wb", []> { + (ins addrmode6:$Rn, am6offset:$Rm, + DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, + "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm", + "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVST2LN"; } @@ -2447,9 +2748,9 @@ class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, // Long shift by immediate. class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, SDNode OpNode> + ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode> : N2VImm<op24, op23, op11_8, op7, op6, op4, - (outs QPR:$Vd), (ins DPR:$Vm, i32imm:$SIMM), N2RegVShLFrm, + (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm, IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>; @@ -2679,14 +2980,11 @@ multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, v4i32, v4i32, OpNode, Commutable>; } -multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, string Dt, SDNode ShOp> { - def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"), - v4i16, ShOp>; - def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, !strconcat(Dt,"32"), - v2i32, ShOp>; - def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"), - v8i16, v4i16, ShOp>; - def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, !strconcat(Dt,"32"), +multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> { + def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>; + def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>; + def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>; + def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32", v4i32, v2i32, ShOp>; } @@ -3351,15 +3649,15 @@ multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, string OpcodeStr, string Dt, SDNode OpNode> { def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4, - OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode> { + OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4, - OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode> { + OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4, - OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode> { + OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> { let Inst{21} = 0b1; // imm6 = 1xxxxx } } @@ -3448,7 +3746,7 @@ def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32", v2f32, v2f32, fmul, 1>; def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32", v4f32, v4f32, fmul, 1>; -defm VMULsl : N3VSL_HS<0b1000, "vmul", "i", mul>; +defm VMULsl : N3VSL_HS<0b1000, "vmul", mul>; def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, v2f32, fmul>; @@ -3942,12 +4240,12 @@ def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", - [/* For disassembly only; pattern left blank */]>; + []>; def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", - [/* For disassembly only; pattern left blank */]>; + []>; // VBIT : Vector Bitwise Insert if True // like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", @@ -3956,12 +4254,12 @@ def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", - [/* For disassembly only; pattern left blank */]>; + []>; def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", - [/* For disassembly only; pattern left blank */]>; + []>; // VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking // for equivalent operations with different register constraints; it just @@ -4159,18 +4457,18 @@ defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>; // VSHLL : Vector Shift Left Long (with maximum shift count) class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, - ValueType OpTy, SDNode OpNode> + ValueType OpTy, Operand ImmTy, SDNode OpNode> : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt, - ResTy, OpTy, OpNode> { + ResTy, OpTy, ImmTy, OpNode> { let Inst{21-16} = op21_16; let DecoderMethod = "DecodeVSHLMaxInstruction"; } def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", - v8i16, v8i8, NEONvshlli>; + v8i16, v8i8, imm8, NEONvshlli>; def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", - v4i32, v4i16, NEONvshlli>; + v4i32, v4i16, imm16, NEONvshlli>; def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", - v2i64, v2i32, NEONvshlli>; + v2i64, v2i32, imm32, NEONvshlli>; // VSHRN : Vector Shift Right and Narrow defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", @@ -4328,7 +4626,7 @@ def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, IIC_VCNTiQ, "vcnt", "8", v16i8, v16i8, int_arm_neon_vcnt>; -// Vector Swap -- for disassembly only. +// Vector Swap def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, "vswp", "$Vd, $Vm", "", []>; @@ -4392,6 +4690,15 @@ def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd), (ins nImmSplatI64:$SIMM), IIC_VMOVImm, "vmov", "i64", "$Vd, $SIMM", "", [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>; + +def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd), + (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, + "vmov", "f32", "$Vd, $SIMM", "", + [(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>; +def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd), + (ins nImmVMOVF32:$SIMM), IIC_VMOVImm, + "vmov", "f32", "$Vd, $SIMM", "", + [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>; } // isReMaterializable // VMOV : Vector Get Lane (move scalar to ARM core register) @@ -4680,6 +4987,7 @@ def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", v4f32, v4i32, uint_to_fp>; // VCVT : Vector Convert Between Floating-Point and Fixed-Point. +let DecoderMethod = "DecodeVCVTD" in { def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", @@ -4688,7 +4996,9 @@ def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; +} +let DecoderMethod = "DecodeVCVTQ" in { def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", @@ -4697,6 +5007,7 @@ def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; +} // VCVT : Vector Convert Between Half-Precision and Single-Precision. def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0, @@ -4789,34 +5100,34 @@ def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>; // VEXT : Vector Extract -class VEXTd<string OpcodeStr, string Dt, ValueType Ty> +class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd), - (ins DPR:$Vn, DPR:$Vm, i32imm:$index), NVExtFrm, + (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm, IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), - (Ty DPR:$Vm), imm:$index)))]> { + (Ty DPR:$Vm), imm:$index)))]> { bits<4> index; let Inst{11-8} = index{3-0}; } -class VEXTq<string OpcodeStr, string Dt, ValueType Ty> +class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy> : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd), - (ins QPR:$Vn, QPR:$Vm, i32imm:$index), NVExtFrm, + (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm, IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn), - (Ty QPR:$Vm), imm:$index)))]> { + (Ty QPR:$Vm), imm:$index)))]> { bits<4> index; let Inst{11-8} = index{3-0}; } -def VEXTd8 : VEXTd<"vext", "8", v8i8> { +def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { let Inst{11-8} = index{3-0}; } -def VEXTd16 : VEXTd<"vext", "16", v4i16> { +def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> { let Inst{11-9} = index{2-0}; let Inst{8} = 0b0; } -def VEXTd32 : VEXTd<"vext", "32", v2i32> { +def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> { let Inst{11-10} = index{1-0}; let Inst{9-8} = 0b00; } @@ -4825,17 +5136,21 @@ def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), (i32 imm:$index))), (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>; -def VEXTq8 : VEXTq<"vext", "8", v16i8> { +def VEXTq8 : VEXTq<"vext", "8", v16i8, imm0_15> { let Inst{11-8} = index{3-0}; } -def VEXTq16 : VEXTq<"vext", "16", v8i16> { +def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> { let Inst{11-9} = index{2-0}; let Inst{8} = 0b0; } -def VEXTq32 : VEXTq<"vext", "32", v4i32> { +def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> { let Inst{11-10} = index{1-0}; let Inst{9-8} = 0b00; } +def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> { + let Inst{11} = index{0}; + let Inst{10-8} = 0b000; +} def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), (v4f32 QPR:$Vm), (i32 imm:$index))), @@ -4883,17 +5198,17 @@ def VTBL1 let hasExtraSrcRegAllocReq = 1 in { def VTBL2 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd), - (ins DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTB2, - "vtbl", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "", []>; + (ins VecListTwoD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2, + "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; def VTBL3 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd), - (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), NVTBLFrm, IIC_VTB3, - "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", "", []>; + (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3, + "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; def VTBL4 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd), - (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), + (ins VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB4, - "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", "", []>; + "vtbl", "8", "$Vd, $Vn, $Vm", "", []>; } // hasExtraSrcRegAllocReq = 1 def VTBL2Pseudo @@ -4906,25 +5221,25 @@ def VTBL4Pseudo // VTBX : Vector Table Extension def VTBX1 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd), - (ins DPR:$orig, DPR:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1, - "vtbx", "8", "$Vd, \\{$Vn\\}, $Vm", "$orig = $Vd", + (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1, + "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1 - DPR:$orig, DPR:$Vn, DPR:$Vm)))]>; + DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>; let hasExtraSrcRegAllocReq = 1 in { def VTBX2 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), - (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTBX2, - "vtbx", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "$orig = $Vd", []>; + (ins DPR:$orig, VecListTwoD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2, + "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; def VTBX3 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd), - (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), + (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX3, - "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", + "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; def VTBX4 - : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), (ins DPR:$orig, DPR:$Vn, - DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), NVTBLFrm, IIC_VTBX4, - "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", + : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), + (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4, + "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>; } // hasExtraSrcRegAllocReq = 1 @@ -5058,3 +5373,541 @@ def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; + + +//===----------------------------------------------------------------------===// +// Assembler aliases +// + +def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn", + (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>; +def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn", + (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>; + + +// VADD two-operand aliases. +def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm", + (VADDv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm", + (VADDv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm", + (VADDv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm", + (VADDv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm", + (VADDv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm", + (VADDv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm", + (VADDv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm", + (VADDv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm", + (VADDfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm", + (VADDfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// VSUB two-operand aliases. +def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm", + (VSUBv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm", + (VSUBv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm", + (VSUBv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm", + (VSUBv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm", + (VSUBv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm", + (VSUBv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm", + (VSUBv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm", + (VSUBv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm", + (VSUBfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm", + (VSUBfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// VADDW two-operand aliases. +def : NEONInstAlias<"vaddw${p}.s8 $Vdn, $Vm", + (VADDWsv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vaddw${p}.s16 $Vdn, $Vm", + (VADDWsv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vaddw${p}.s32 $Vdn, $Vm", + (VADDWsv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vaddw${p}.u8 $Vdn, $Vm", + (VADDWuv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vaddw${p}.u16 $Vdn, $Vm", + (VADDWuv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vaddw${p}.u32 $Vdn, $Vm", + (VADDWuv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>; + +// VAND/VBIC/VEOR/VORR accept but do not require a type suffix. +defm : VFPDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", + (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", + (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", + (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm", + (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", + (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", + (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", + (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", + (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; +// ... two-operand aliases +def : NEONInstAlias<"vand${p} $Vdn, $Vm", + (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vand${p} $Vdn, $Vm", + (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vbic${p} $Vdn, $Vm", + (VBICd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vbic${p} $Vdn, $Vm", + (VBICq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"veor${p} $Vdn, $Vm", + (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"veor${p} $Vdn, $Vm", + (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vorr${p} $Vdn, $Vm", + (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vorr${p} $Vdn, $Vm", + (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +defm : VFPDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", + (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"vand${p}", "$Vdn, $Vm", + (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", + (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"veor${p}", "$Vdn, $Vm", + (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", + (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", + (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// VMUL two-operand aliases. +def : NEONInstAlias<"vmul${p}.p8 $Qdn, $Qm", + (VMULpq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i8 $Qdn, $Qm", + (VMULv16i8 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Qm", + (VMULv8i16 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Qm", + (VMULv4i32 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; + +def : NEONInstAlias<"vmul${p}.p8 $Ddn, $Dm", + (VMULpd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i8 $Ddn, $Dm", + (VMULv8i8 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm", + (VMULv4i16 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm", + (VMULv2i32 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; + +def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Qm", + (VMULfq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm", + (VMULfd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>; + +def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm$lane", + (VMULslv4i16 DPR:$Ddn, DPR:$Ddn, DPR_8:$Dm, + VectorIndex16:$lane, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Dm$lane", + (VMULslv8i16 QPR:$Qdn, QPR:$Qdn, DPR_8:$Dm, + VectorIndex16:$lane, pred:$p)>; + +def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm$lane", + (VMULslv2i32 DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm, + VectorIndex32:$lane, pred:$p)>; +def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Dm$lane", + (VMULslv4i32 QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm, + VectorIndex32:$lane, pred:$p)>; + +def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm$lane", + (VMULslfd DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm, + VectorIndex32:$lane, pred:$p)>; +def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Dm$lane", + (VMULslfq QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm, + VectorIndex32:$lane, pred:$p)>; + +// VQADD (register) two-operand aliases. +def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm", + (VQADDsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm", + (VQADDsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm", + (VQADDsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm", + (VQADDsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm", + (VQADDuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm", + (VQADDuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm", + (VQADDuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm", + (VQADDuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm", + (VQADDsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm", + (VQADDsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm", + (VQADDsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm", + (VQADDsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm", + (VQADDuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm", + (VQADDuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm", + (VQADDuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm", + (VQADDuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// VSHL (immediate) two-operand aliases. +def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm", + (VSHLiv8i8 DPR:$Vdn, DPR:$Vdn, imm0_7:$imm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm", + (VSHLiv4i16 DPR:$Vdn, DPR:$Vdn, imm0_15:$imm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm", + (VSHLiv2i32 DPR:$Vdn, DPR:$Vdn, imm0_31:$imm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm", + (VSHLiv1i64 DPR:$Vdn, DPR:$Vdn, imm0_63:$imm, pred:$p)>; + +def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm", + (VSHLiv16i8 QPR:$Vdn, QPR:$Vdn, imm0_7:$imm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm", + (VSHLiv8i16 QPR:$Vdn, QPR:$Vdn, imm0_15:$imm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm", + (VSHLiv4i32 QPR:$Vdn, QPR:$Vdn, imm0_31:$imm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm", + (VSHLiv2i64 QPR:$Vdn, QPR:$Vdn, imm0_63:$imm, pred:$p)>; + +// VSHL (register) two-operand aliases. +def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm", + (VSHLsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm", + (VSHLsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm", + (VSHLsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm", + (VSHLsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm", + (VSHLuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm", + (VSHLuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm", + (VSHLuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm", + (VSHLuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm", + (VSHLsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm", + (VSHLsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm", + (VSHLsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm", + (VSHLsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm", + (VSHLuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm", + (VSHLuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm", + (VSHLuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm", + (VSHLuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// VSHL (immediate) two-operand aliases. +def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm", + (VSHRsv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm", + (VSHRsv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm", + (VSHRsv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm", + (VSHRsv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>; + +def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm", + (VSHRsv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm", + (VSHRsv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm", + (VSHRsv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm", + (VSHRsv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>; + +def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm", + (VSHRuv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm", + (VSHRuv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm", + (VSHRuv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm", + (VSHRuv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>; + +def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm", + (VSHRuv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm", + (VSHRuv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm", + (VSHRuv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>; +def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm", + (VSHRuv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>; + +// VLD1 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +defm VLD1LNdAsm : NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr", + (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD1LNdAsm : NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr", + (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD1LNdAsm : NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr", + (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +defm VLD1LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr!", + (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD1LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr!", + (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD1LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr!", + (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD1LNdWB_register_Asm : + NEONDT8AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm", + (ins VecListOneDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +defm VLD1LNdWB_register_Asm : + NEONDT16AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm", + (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +defm VLD1LNdWB_register_Asm : + NEONDT32AsmPseudoInst<"vld1${p}", "$list, $addr, $Rm", + (ins VecListOneDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + +// VST1 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +defm VST1LNdAsm : NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr", + (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST1LNdAsm : NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr", + (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST1LNdAsm : NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr", + (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +defm VST1LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr!", + (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST1LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr!", + (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST1LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr!", + (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST1LNdWB_register_Asm : + NEONDT8AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm", + (ins VecListOneDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +defm VST1LNdWB_register_Asm : + NEONDT16AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm", + (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +defm VST1LNdWB_register_Asm : + NEONDT32AsmPseudoInst<"vst1${p}", "$list, $addr, $Rm", + (ins VecListOneDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + +// VLD2 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +defm VLD2LNdAsm : NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr", + (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD2LNdAsm : NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr", + (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD2LNdAsm : NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr", + (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +defm VLD2LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr!", + (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD2LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr!", + (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD2LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr!", + (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VLD2LNdWB_register_Asm : + NEONDT8AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm", + (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +defm VLD2LNdWB_register_Asm : + NEONDT16AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm", + (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +defm VLD2LNdWB_register_Asm : + NEONDT32AsmPseudoInst<"vld2${p}", "$list, $addr, $Rm", + (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + + +// VST2 single-lane pseudo-instructions. These need special handling for +// the lane index that an InstAlias can't handle, so we use these instead. +defm VST2LNdAsm : NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr", + (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST2LNdAsm : NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr", + (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST2LNdAsm : NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr", + (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + +defm VST2LNdWB_fixed_Asm : NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr!", + (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST2LNdWB_fixed_Asm : NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr!", + (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST2LNdWB_fixed_Asm : NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr!", + (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; +defm VST2LNdWB_register_Asm : + NEONDT8AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm", + (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +defm VST2LNdWB_register_Asm : + NEONDT16AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm", + (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; +defm VST2LNdWB_register_Asm : + NEONDT32AsmPseudoInst<"vst2${p}", "$list, $addr, $Rm", + (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, + rGPR:$Rm, pred:$p)>; + +// VMOV takes an optional datatype suffix +defm : VFPDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", + (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", + (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; + +// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. +// D-register versions. +def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm", + (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm", + (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm", + (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm", + (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm", + (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm", + (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm", + (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +// Q-register versions. +def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm", + (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm", + (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm", + (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm", + (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm", + (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm", + (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm", + (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; + +// VCLT (register) is an assembler alias for VCGT w/ the operands reversed. +// D-register versions. +def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm", + (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm", + (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm", + (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm", + (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm", + (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm", + (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm", + (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>; +// Q-register versions. +def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm", + (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm", + (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm", + (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm", + (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm", + (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm", + (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; +def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm", + (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>; + +// Two-operand variants for VEXT +def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm", + (VEXTd8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_7:$imm, pred:$p)>; +def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm", + (VEXTd16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_3:$imm, pred:$p)>; +def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm", + (VEXTd32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_1:$imm, pred:$p)>; + +def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm", + (VEXTq8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_15:$imm, pred:$p)>; +def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm", + (VEXTq16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_7:$imm, pred:$p)>; +def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm", + (VEXTq32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_3:$imm, pred:$p)>; +def : NEONInstAlias<"vext${p}.64 $Vdn, $Vm, $imm", + (VEXTq64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_1:$imm, pred:$p)>; + +// Two-operand variants for VQDMULH +def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm", + (VQDMULHv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm", + (VQDMULHv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; + +def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm", + (VQDMULHv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm", + (VQDMULHv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; + +// 'gas' compatibility aliases for quad-word instructions. Strictly speaking, +// these should restrict to just the Q register variants, but the register +// classes are enough to match correctly regardless, so we keep it simple +// and just use MnemonicAlias. +def : NEONMnemonicAlias<"vbicq", "vbic">; +def : NEONMnemonicAlias<"vandq", "vand">; +def : NEONMnemonicAlias<"veorq", "veor">; +def : NEONMnemonicAlias<"vorrq", "vorr">; + +def : NEONMnemonicAlias<"vmovq", "vmov">; +def : NEONMnemonicAlias<"vmvnq", "vmvn">; +// Explicit versions for floating point so that the FPImm variants get +// handled early. The parser gets confused otherwise. +def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">; +def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">; + +def : NEONMnemonicAlias<"vaddq", "vadd">; +def : NEONMnemonicAlias<"vsubq", "vsub">; + +def : NEONMnemonicAlias<"vminq", "vmin">; +def : NEONMnemonicAlias<"vmaxq", "vmax">; + +def : NEONMnemonicAlias<"vmulq", "vmul">; + +def : NEONMnemonicAlias<"vabsq", "vabs">; + +def : NEONMnemonicAlias<"vshlq", "vshl">; +def : NEONMnemonicAlias<"vshrq", "vshr">; + +def : NEONMnemonicAlias<"vcvtq", "vcvt">; + +def : NEONMnemonicAlias<"vcleq", "vcle">; +def : NEONMnemonicAlias<"vceqq", "vceq">; |
