//===- ARMInstrNEON.td - NEON support for ARM -----------------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file describes the ARM NEON instruction set. // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // NEON-specific Operands. //===----------------------------------------------------------------------===// def nModImm : Operand { let PrintMethod = "printNEONModImmOperand"; } def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; } def nImmSplatI8 : Operand { let PrintMethod = "printNEONModImmOperand"; let ParserMatchClass = nImmSplatI8AsmOperand; } def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; } def nImmSplatI16 : Operand { let PrintMethod = "printNEONModImmOperand"; let ParserMatchClass = nImmSplatI16AsmOperand; } def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; } def nImmSplatI32 : Operand { let PrintMethod = "printNEONModImmOperand"; let ParserMatchClass = nImmSplatI32AsmOperand; } def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; } def nImmVMOVI32 : Operand { let PrintMethod = "printNEONModImmOperand"; let ParserMatchClass = nImmVMOVI32AsmOperand; } def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; } def nImmVMOVI32Neg : Operand { let PrintMethod = "printNEONModImmOperand"; let ParserMatchClass = nImmVMOVI32NegAsmOperand; } def nImmVMOVF32 : Operand { let PrintMethod = "printFPImmOperand"; let ParserMatchClass = FPImmOperand; } def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; } def nImmSplatI64 : Operand { let PrintMethod = "printNEONModImmOperand"; let ParserMatchClass = nImmSplatI64AsmOperand; } def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; } def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; } def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; } def VectorIndex8 : Operand, ImmLeaf { let ParserMatchClass = VectorIndex8Operand; let PrintMethod = "printVectorIndex"; let MIOperandInfo = (ops i32imm); } def VectorIndex16 : Operand, ImmLeaf { let ParserMatchClass = VectorIndex16Operand; let PrintMethod = "printVectorIndex"; let MIOperandInfo = (ops i32imm); } def VectorIndex32 : Operand, ImmLeaf { let ParserMatchClass = VectorIndex32Operand; let PrintMethod = "printVectorIndex"; let MIOperandInfo = (ops i32imm); } // Register list of one D register. def VecListOneDAsmOperand : AsmOperandClass { let Name = "VecListOneD"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListOperands"; } def VecListOneD : RegisterOperand { let ParserMatchClass = VecListOneDAsmOperand; } // Register list of two sequential D registers. def VecListTwoDAsmOperand : AsmOperandClass { let Name = "VecListTwoD"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListOperands"; } def VecListTwoD : RegisterOperand { let ParserMatchClass = VecListTwoDAsmOperand; } // Register list of three sequential D registers. def VecListThreeDAsmOperand : AsmOperandClass { let Name = "VecListThreeD"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListOperands"; } def VecListThreeD : RegisterOperand { let ParserMatchClass = VecListThreeDAsmOperand; } // Register list of four sequential D registers. def VecListFourDAsmOperand : AsmOperandClass { let Name = "VecListFourD"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListOperands"; } def VecListFourD : RegisterOperand { let ParserMatchClass = VecListFourDAsmOperand; } // Register list of two D registers spaced by 2 (two sequential Q registers). def VecListTwoQAsmOperand : AsmOperandClass { let Name = "VecListTwoQ"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListOperands"; } def VecListTwoQ : RegisterOperand { let ParserMatchClass = VecListTwoQAsmOperand; } // Register list of three D registers spaced by 2 (three Q registers). def VecListThreeQAsmOperand : AsmOperandClass { let Name = "VecListThreeQ"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListOperands"; } def VecListThreeQ : RegisterOperand { let ParserMatchClass = VecListThreeQAsmOperand; } // Register list of three D registers spaced by 2 (three Q registers). def VecListFourQAsmOperand : AsmOperandClass { let Name = "VecListFourQ"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListOperands"; } def VecListFourQ : RegisterOperand { let ParserMatchClass = VecListFourQAsmOperand; } // Register list of one D register, with "all lanes" subscripting. def VecListOneDAllLanesAsmOperand : AsmOperandClass { let Name = "VecListOneDAllLanes"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListOperands"; } def VecListOneDAllLanes : RegisterOperand { let ParserMatchClass = VecListOneDAllLanesAsmOperand; } // Register list of two D registers, with "all lanes" subscripting. def VecListTwoDAllLanesAsmOperand : AsmOperandClass { let Name = "VecListTwoDAllLanes"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListOperands"; } def VecListTwoDAllLanes : RegisterOperand { let ParserMatchClass = VecListTwoDAllLanesAsmOperand; } // Register list of two D registers spaced by 2 (two sequential Q registers). def VecListTwoQAllLanesAsmOperand : AsmOperandClass { let Name = "VecListTwoQAllLanes"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListOperands"; } def VecListTwoQAllLanes : RegisterOperand { let ParserMatchClass = VecListTwoQAllLanesAsmOperand; } // Register list of one D register, with byte lane subscripting. def VecListOneDByteIndexAsmOperand : AsmOperandClass { let Name = "VecListOneDByteIndexed"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListIndexedOperands"; } def VecListOneDByteIndexed : Operand { let ParserMatchClass = VecListOneDByteIndexAsmOperand; let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); } // ...with half-word lane subscripting. def VecListOneDHWordIndexAsmOperand : AsmOperandClass { let Name = "VecListOneDHWordIndexed"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListIndexedOperands"; } def VecListOneDHWordIndexed : Operand { let ParserMatchClass = VecListOneDHWordIndexAsmOperand; let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); } // ...with word lane subscripting. def VecListOneDWordIndexAsmOperand : AsmOperandClass { let Name = "VecListOneDWordIndexed"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListIndexedOperands"; } def VecListOneDWordIndexed : Operand { let ParserMatchClass = VecListOneDWordIndexAsmOperand; let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); } // Register list of two D registers with byte lane subscripting. def VecListTwoDByteIndexAsmOperand : AsmOperandClass { let Name = "VecListTwoDByteIndexed"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListIndexedOperands"; } def VecListTwoDByteIndexed : Operand { let ParserMatchClass = VecListTwoDByteIndexAsmOperand; let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); } // ...with half-word lane subscripting. def VecListTwoDHWordIndexAsmOperand : AsmOperandClass { let Name = "VecListTwoDHWordIndexed"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListIndexedOperands"; } def VecListTwoDHWordIndexed : Operand { let ParserMatchClass = VecListTwoDHWordIndexAsmOperand; let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); } // ...with word lane subscripting. def VecListTwoDWordIndexAsmOperand : AsmOperandClass { let Name = "VecListTwoDWordIndexed"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListIndexedOperands"; } def VecListTwoDWordIndexed : Operand { let ParserMatchClass = VecListTwoDWordIndexAsmOperand; let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); } // Register list of two Q registers with half-word lane subscripting. def VecListTwoQHWordIndexAsmOperand : AsmOperandClass { let Name = "VecListTwoQHWordIndexed"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListIndexedOperands"; } def VecListTwoQHWordIndexed : Operand { let ParserMatchClass = VecListTwoQHWordIndexAsmOperand; let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); } // ...with word lane subscripting. def VecListTwoQWordIndexAsmOperand : AsmOperandClass { let Name = "VecListTwoQWordIndexed"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListIndexedOperands"; } def VecListTwoQWordIndexed : Operand { let ParserMatchClass = VecListTwoQWordIndexAsmOperand; let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); } // Register list of three D registers with byte lane subscripting. def VecListThreeDByteIndexAsmOperand : AsmOperandClass { let Name = "VecListThreeDByteIndexed"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListIndexedOperands"; } def VecListThreeDByteIndexed : Operand { let ParserMatchClass = VecListThreeDByteIndexAsmOperand; let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); } // ...with half-word lane subscripting. def VecListThreeDHWordIndexAsmOperand : AsmOperandClass { let Name = "VecListThreeDHWordIndexed"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListIndexedOperands"; } def VecListThreeDHWordIndexed : Operand { let ParserMatchClass = VecListThreeDHWordIndexAsmOperand; let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); } // ...with word lane subscripting. def VecListThreeDWordIndexAsmOperand : AsmOperandClass { let Name = "VecListThreeDWordIndexed"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListIndexedOperands"; } def VecListThreeDWordIndexed : Operand { let ParserMatchClass = VecListThreeDWordIndexAsmOperand; let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); } // Register list of three Q registers with half-word lane subscripting. def VecListThreeQHWordIndexAsmOperand : AsmOperandClass { let Name = "VecListThreeQHWordIndexed"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListIndexedOperands"; } def VecListThreeQHWordIndexed : Operand { let ParserMatchClass = VecListThreeQHWordIndexAsmOperand; let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); } // ...with word lane subscripting. def VecListThreeQWordIndexAsmOperand : AsmOperandClass { let Name = "VecListThreeQWordIndexed"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListIndexedOperands"; } def VecListThreeQWordIndexed : Operand { let ParserMatchClass = VecListThreeQWordIndexAsmOperand; let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); } // Register list of four D registers with byte lane subscripting. def VecListFourDByteIndexAsmOperand : AsmOperandClass { let Name = "VecListFourDByteIndexed"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListIndexedOperands"; } def VecListFourDByteIndexed : Operand { let ParserMatchClass = VecListFourDByteIndexAsmOperand; let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); } // ...with half-word lane subscripting. def VecListFourDHWordIndexAsmOperand : AsmOperandClass { let Name = "VecListFourDHWordIndexed"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListIndexedOperands"; } def VecListFourDHWordIndexed : Operand { let ParserMatchClass = VecListFourDHWordIndexAsmOperand; let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); } // ...with word lane subscripting. def VecListFourDWordIndexAsmOperand : AsmOperandClass { let Name = "VecListFourDWordIndexed"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListIndexedOperands"; } def VecListFourDWordIndexed : Operand { let ParserMatchClass = VecListFourDWordIndexAsmOperand; let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); } // Register list of four Q registers with half-word lane subscripting. def VecListFourQHWordIndexAsmOperand : AsmOperandClass { let Name = "VecListFourQHWordIndexed"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListIndexedOperands"; } def VecListFourQHWordIndexed : Operand { let ParserMatchClass = VecListFourQHWordIndexAsmOperand; let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); } // ...with word lane subscripting. def VecListFourQWordIndexAsmOperand : AsmOperandClass { let Name = "VecListFourQWordIndexed"; let ParserMethod = "parseVectorList"; let RenderMethod = "addVecListIndexedOperands"; } def VecListFourQWordIndexed : Operand { let ParserMatchClass = VecListFourQWordIndexAsmOperand; let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx); } //===----------------------------------------------------------------------===// // NEON-specific DAG Nodes. //===----------------------------------------------------------------------===// def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; def SDTARMVCMPZ : SDTypeProfile<1, 1, []>; def NEONvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; def NEONvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>; def NEONvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; def NEONvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>; def NEONvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>; def NEONvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; def NEONvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; def NEONvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>; def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>; def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; // Types for vector shift by immediates. The "SHX" version is for long and // narrow operations where the source and destination vectors have different // types. The "SHINS" version is for shift and insert operations. def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>; def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, SDTCisVT<2, i32>]>; def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>; def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>; def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>; def NEONvshlls : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>; def NEONvshllu : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>; def NEONvshlli : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>; def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>; def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>; def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>; def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>; def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>; def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>; def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>; def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>; def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>; def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>; def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>; def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>; def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>; def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>; def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>; def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>, SDTCisVT<2, i32>]>; def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>; def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>; def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>; def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>; def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>; def NEONvbsl : SDNode<"ARMISD::VBSL", SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>]>>; def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; // VDUPLANE can produce a quad-register result from a double-register source, // so the result is not constrained to match the source. def NEONvduplane : SDNode<"ARMISD::VDUPLANE", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisVT<2, i32>]>>; def SDTARMVEXT : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>; def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>; def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>; def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>; def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>]>; def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 2>]>; def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>; def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>; def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>]>; def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>; def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>; def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{ ConstantSDNode *ConstVal = cast(N->getOperand(0)); unsigned EltBits = 0; uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); return (EltBits == 32 && EltVal == 0); }]>; def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{ ConstantSDNode *ConstVal = cast(N->getOperand(0)); unsigned EltBits = 0; uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits); return (EltBits == 8 && EltVal == 0xff); }]>; //===----------------------------------------------------------------------===// // NEON load / store instructions //===----------------------------------------------------------------------===// // Use VLDM to load a Q register as a D register pair. // This is a pseudo instruction that is expanded to VLDMD after reg alloc. def VLDMQIA : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn), IIC_fpLoad_m, "", [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>; // Use VSTM to store a Q register as a D register pair. // This is a pseudo instruction that is expanded to VSTMD after reg alloc. def VSTMQIA : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn), IIC_fpStore_m, "", [(store (v2f64 QPR:$src), GPR:$Rn)]>; // Classes for VLD* pseudo-instructions with multi-register operands. // These are expanded to real instructions after register allocation. class VLDQPseudo : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">; class VLDQWBPseudo : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), (ins addrmode6:$addr, am6offset:$offset), itin, "$addr.addr = $wb">; class VLDQWBfixedPseudo : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), (ins addrmode6:$addr), itin, "$addr.addr = $wb">; class VLDQWBregisterPseudo : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), (ins addrmode6:$addr, rGPR:$offset), itin, "$addr.addr = $wb">; class VLDQQPseudo : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">; class VLDQQWBPseudo : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), (ins addrmode6:$addr, am6offset:$offset), itin, "$addr.addr = $wb">; class VLDQQWBfixedPseudo : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), (ins addrmode6:$addr), itin, "$addr.addr = $wb">; class VLDQQWBregisterPseudo : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), (ins addrmode6:$addr, rGPR:$offset), itin, "$addr.addr = $wb">; class VLDQQQQPseudo : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin, "$src = $dst">; class VLDQQQQWBPseudo : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, "$addr.addr = $wb, $src = $dst">; let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // VLD1 : Vector Load (multiple single elements) class VLD1D op7_4, string Dt> : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd), (ins addrmode6:$Rn), IIC_VLD1, "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDInstruction"; } class VLD1Q op7_4, string Dt> : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd), (ins addrmode6:$Rn), IIC_VLD1x2, "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDInstruction"; } def VLD1d8 : VLD1D<{0,0,0,?}, "8">; def VLD1d16 : VLD1D<{0,1,0,?}, "16">; def VLD1d32 : VLD1D<{1,0,0,?}, "32">; def VLD1d64 : VLD1D<{1,1,0,?}, "64">; def VLD1q8 : VLD1Q<{0,0,?,?}, "8">; def VLD1q16 : VLD1Q<{0,1,?,?}, "16">; def VLD1q32 : VLD1Q<{1,0,?,?}, "32">; def VLD1q64 : VLD1Q<{1,1,?,?}, "64">; def VLD1q8Pseudo : VLDQPseudo; def VLD1q16Pseudo : VLDQPseudo; def VLD1q32Pseudo : VLDQPseudo; def VLD1q64Pseudo : VLDQPseudo; // ...with address register writeback: multiclass VLD1DWB op7_4, string Dt> { def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), (ins addrmode6:$Rn), IIC_VLD1u, "vld1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDInstruction"; let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDInstruction"; let AsmMatchConverter = "cvtVLDwbRegister"; } } multiclass VLD1QWB op7_4, string Dt> { def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd, GPR:$wb), (ins addrmode6:$Rn), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDInstruction"; let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDInstruction"; let AsmMatchConverter = "cvtVLDwbRegister"; } } defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8">; defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16">; defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32">; defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64">; defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8">; defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16">; defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32">; defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64">; def VLD1q8PseudoWB_fixed : VLDQWBfixedPseudo; def VLD1q16PseudoWB_fixed : VLDQWBfixedPseudo; def VLD1q32PseudoWB_fixed : VLDQWBfixedPseudo; def VLD1q64PseudoWB_fixed : VLDQWBfixedPseudo; def VLD1q8PseudoWB_register : VLDQWBregisterPseudo; def VLD1q16PseudoWB_register : VLDQWBregisterPseudo; def VLD1q32PseudoWB_register : VLDQWBregisterPseudo; def VLD1q64PseudoWB_register : VLDQWBregisterPseudo; // ...with 3 registers class VLD1D3 op7_4, string Dt> : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), (ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDInstruction"; } multiclass VLD1D3WB op7_4, string Dt> { def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), (ins addrmode6:$Rn), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDInstruction"; let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDInstruction"; let AsmMatchConverter = "cvtVLDwbRegister"; } } def VLD1d8T : VLD1D3<{0,0,0,?}, "8">; def VLD1d16T : VLD1D3<{0,1,0,?}, "16">; def VLD1d32T : VLD1D3<{1,0,0,?}, "32">; def VLD1d64T : VLD1D3<{1,1,0,?}, "64">; defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8">; defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16">; defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32">; defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64">; def VLD1d64TPseudo : VLDQQPseudo; // ...with 4 registers class VLD1D4 op7_4, string Dt> : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd), (ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDInstruction"; } multiclass VLD1D4WB op7_4, string Dt> { def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), (ins addrmode6:$Rn), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDInstruction"; let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDInstruction"; let AsmMatchConverter = "cvtVLDwbRegister"; } } def VLD1d8Q : VLD1D4<{0,0,?,?}, "8">; def VLD1d16Q : VLD1D4<{0,1,?,?}, "16">; def VLD1d32Q : VLD1D4<{1,0,?,?}, "32">; def VLD1d64Q : VLD1D4<{1,1,?,?}, "64">; defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8">; defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16">; defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32">; defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64">; def VLD1d64QPseudo : VLDQQPseudo; // VLD2 : Vector Load (multiple 2-element structures) class VLD2 op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, InstrItinClass itin> : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd), (ins addrmode6:$Rn), itin, "vld2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDInstruction"; } def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VLD2>; def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VLD2>; def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VLD2>; def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>; def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>; def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>; def VLD2d8Pseudo : VLDQPseudo; def VLD2d16Pseudo : VLDQPseudo; def VLD2d32Pseudo : VLDQPseudo; def VLD2q8Pseudo : VLDQQPseudo; def VLD2q16Pseudo : VLDQQPseudo; def VLD2q32Pseudo : VLDQQPseudo; // ...with address register writeback: multiclass VLD2WB op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, InstrItinClass itin> { def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), (ins addrmode6:$Rn), itin, "vld2", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDInstruction"; let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), itin, "vld2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDInstruction"; let AsmMatchConverter = "cvtVLDwbRegister"; } } defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VLD2u>; defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VLD2u>; defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VLD2u>; defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>; defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>; defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>; def VLD2d8PseudoWB_fixed : VLDQWBfixedPseudo; def VLD2d16PseudoWB_fixed : VLDQWBfixedPseudo; def VLD2d32PseudoWB_fixed : VLDQWBfixedPseudo; def VLD2d8PseudoWB_register : VLDQWBregisterPseudo; def VLD2d16PseudoWB_register : VLDQWBregisterPseudo; def VLD2d32PseudoWB_register : VLDQWBregisterPseudo; def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo; def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo; def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo; def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo; def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo; def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo; // ...with double-spaced registers def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VLD2>; def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VLD2>; def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VLD2>; defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VLD2u>; defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VLD2u>; defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VLD2u>; // VLD3 : Vector Load (multiple 3-element structures) class VLD3D op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), (ins addrmode6:$Rn), IIC_VLD3, "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDInstruction"; } def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">; def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">; def VLD3d8Pseudo : VLDQQPseudo; def VLD3d16Pseudo : VLDQQPseudo; def VLD3d32Pseudo : VLDQQPseudo; // ...with address register writeback: class VLD3DWB op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u, "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDInstruction"; } def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">; def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">; def VLD3d8Pseudo_UPD : VLDQQWBPseudo; def VLD3d16Pseudo_UPD : VLDQQWBPseudo; def VLD3d32Pseudo_UPD : VLDQQWBPseudo; // ...with double-spaced registers: def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">; def VLD3q16 : VLD3D<0b0101, {0,1,0,?}, "16">; def VLD3q32 : VLD3D<0b0101, {1,0,0,?}, "32">; def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">; def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">; def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">; def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo; def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo; def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo; // ...alternate versions to be allocated odd register numbers: def VLD3q8oddPseudo : VLDQQQQPseudo; def VLD3q16oddPseudo : VLDQQQQPseudo; def VLD3q32oddPseudo : VLDQQQQPseudo; def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo; def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo; def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo; // VLD4 : Vector Load (multiple 4-element structures) class VLD4D op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), (ins addrmode6:$Rn), IIC_VLD4, "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDInstruction"; } def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">; def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">; def VLD4d8Pseudo : VLDQQPseudo; def VLD4d16Pseudo : VLDQQPseudo; def VLD4d32Pseudo : VLDQQPseudo; // ...with address register writeback: class VLD4DWB op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u, "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDInstruction"; } def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">; def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">; def VLD4d8Pseudo_UPD : VLDQQWBPseudo; def VLD4d16Pseudo_UPD : VLDQQWBPseudo; def VLD4d32Pseudo_UPD : VLDQQWBPseudo; // ...with double-spaced registers: def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">; def VLD4q16 : VLD4D<0b0001, {0,1,?,?}, "16">; def VLD4q32 : VLD4D<0b0001, {1,0,?,?}, "32">; def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">; def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">; def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">; def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo; def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo; def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo; // ...alternate versions to be allocated odd register numbers: def VLD4q8oddPseudo : VLDQQQQPseudo; def VLD4q16oddPseudo : VLDQQQQPseudo; def VLD4q32oddPseudo : VLDQQQQPseudo; def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo; def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo; def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo; } // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 // Classes for VLD*LN pseudo-instructions with multi-register operands. // These are expanded to real instructions after register allocation. class VLDQLNPseudo : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), itin, "$src = $dst">; class VLDQLNWBPseudo : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, QPR:$src, nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; class VLDQQLNPseudo : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), itin, "$src = $dst">; class VLDQQLNWBPseudo : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; class VLDQQQQLNPseudo : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), itin, "$src = $dst">; class VLDQQQQLNWBPseudo : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">; // VLD1LN : Vector Load (single element to one lane) class VLD1LN op11_8, bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane), IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", "$src = $Vd", [(set DPR:$Vd, (vector_insert (Ty DPR:$src), (i32 (LoadOp addrmode6:$Rn)), imm:$lane))]> { let Rm = 0b1111; let DecoderMethod = "DecodeVLD1LN"; } class VLD1LN32 op11_8, bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp> : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd), (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane), IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn", "$src = $Vd", [(set DPR:$Vd, (vector_insert (Ty DPR:$src), (i32 (LoadOp addrmode6oneL32:$Rn)), imm:$lane))]> { let Rm = 0b1111; let DecoderMethod = "DecodeVLD1LN"; } class VLD1QLNPseudo : VLDQLNPseudo { let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src), (i32 (LoadOp addrmode6:$addr)), imm:$lane))]; } def VLD1LNd8 : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> { let Inst{7-5} = lane{2-0}; } def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> { let Inst{7-6} = lane{1-0}; let Inst{5-4} = Rn{5-4}; } def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> { let Inst{7} = lane{0}; let Inst{5-4} = Rn{5-4}; } def VLD1LNq8Pseudo : VLD1QLNPseudo; def VLD1LNq16Pseudo : VLD1QLNPseudo; def VLD1LNq32Pseudo : VLD1QLNPseudo; def : Pat<(vector_insert (v2f32 DPR:$src), (f32 (load addrmode6:$addr)), imm:$lane), (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; def : Pat<(vector_insert (v4f32 QPR:$src), (f32 (load addrmode6:$addr)), imm:$lane), (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // ...with address register writeback: class VLD1LNWB op11_8, bits<4> op7_4, string Dt> : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb), (ins addrmode6:$Rn, am6offset:$Rm, DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn$Rm", "$src = $Vd, $Rn.addr = $wb", []> { let DecoderMethod = "DecodeVLD1LN"; } def VLD1LNd8_UPD : VLD1LNWB<0b0000, {?,?,?,0}, "8"> { let Inst{7-5} = lane{2-0}; } def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> { let Inst{7-6} = lane{1-0}; let Inst{4} = Rn{4}; } def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> { let Inst{7} = lane{0}; let Inst{5} = Rn{4}; let Inst{4} = Rn{4}; } def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo; def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo; def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo; // VLD2LN : Vector Load (single 2-element structure to one lane) class VLD2LN op11_8, bits<4> op7_4, string Dt> : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn", "$src1 = $Vd, $src2 = $dst2", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2LN"; } def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8"> { let Inst{7-5} = lane{2-0}; } def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> { let Inst{7} = lane{0}; } def VLD2LNd8Pseudo : VLDQLNPseudo; def VLD2LNd16Pseudo : VLDQLNPseudo; def VLD2LNd32Pseudo : VLDQLNPseudo; // ...with double-spaced registers: def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> { let Inst{7} = lane{0}; } def VLD2LNq16Pseudo : VLDQQLNPseudo; def VLD2LNq32Pseudo : VLDQQLNPseudo; // ...with address register writeback: class VLD2LNWB op11_8, bits<4> op7_4, string Dt> : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), (ins addrmode6:$Rn, am6offset:$Rm, DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm", "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2LN"; } def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8"> { let Inst{7-5} = lane{2-0}; } def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> { let Inst{7} = lane{0}; } def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo; def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo; def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo; def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> { let Inst{7} = lane{0}; } def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo; def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo; // VLD3LN : Vector Load (single 3-element structure to one lane) class VLD3LN op11_8, bits<4> op7_4, string Dt> : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt, "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn", "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> { let Rm = 0b1111; let DecoderMethod = "DecodeVLD3LN"; } def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8"> { let Inst{7-5} = lane{2-0}; } def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> { let Inst{7} = lane{0}; } def VLD3LNd8Pseudo : VLDQQLNPseudo; def VLD3LNd16Pseudo : VLDQQLNPseudo; def VLD3LNd32Pseudo : VLDQQLNPseudo; // ...with double-spaced registers: def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> { let Inst{7} = lane{0}; } def VLD3LNq16Pseudo : VLDQQQQLNPseudo; def VLD3LNq32Pseudo : VLDQQQQLNPseudo; // ...with address register writeback: class VLD3LNWB op11_8, bits<4> op7_4, string Dt> : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), (ins addrmode6:$Rn, am6offset:$Rm, DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), IIC_VLD3lnu, "vld3", Dt, "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm", "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb", []> { let DecoderMethod = "DecodeVLD3LN"; } def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8"> { let Inst{7-5} = lane{2-0}; } def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> { let Inst{7} = lane{0}; } def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo; def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo; def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo; def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> { let Inst{7} = lane{0}; } def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo; def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo; // VLD4LN : Vector Load (single 4-element structure to one lane) class VLD4LN op11_8, bits<4> op7_4, string Dt> : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt, "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn", "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD4LN"; } def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8"> { let Inst{7-5} = lane{2-0}; } def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> { let Inst{7} = lane{0}; let Inst{5} = Rn{5}; } def VLD4LNd8Pseudo : VLDQQLNPseudo; def VLD4LNd16Pseudo : VLDQQLNPseudo; def VLD4LNd32Pseudo : VLDQQLNPseudo; // ...with double-spaced registers: def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> { let Inst{7} = lane{0}; let Inst{5} = Rn{5}; } def VLD4LNq16Pseudo : VLDQQQQLNPseudo; def VLD4LNq32Pseudo : VLDQQQQLNPseudo; // ...with address register writeback: class VLD4LNWB op11_8, bits<4> op7_4, string Dt> : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), (ins addrmode6:$Rn, am6offset:$Rm, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), IIC_VLD4lnu, "vld4", Dt, "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm", "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD4LN" ; } def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8"> { let Inst{7-5} = lane{2-0}; } def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> { let Inst{7} = lane{0}; let Inst{5} = Rn{5}; } def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo; def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo; def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo; def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> { let Inst{7-6} = lane{1-0}; } def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { let Inst{7} = lane{0}; let Inst{5} = Rn{5}; } def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo; def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo; } // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 // VLD1DUP : Vector Load (single element to all lanes) class VLD1DUP op7_4, string Dt, ValueType Ty, PatFrag LoadOp> : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd), (ins addrmode6dup:$Rn), IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", [(set VecListOneDAllLanes:$Vd, (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; } class VLD1QDUPPseudo : VLDQPseudo { let Pattern = [(set QPR:$dst, (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$addr)))))]; } def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>; def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>; def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>; def VLD1DUPq8Pseudo : VLD1QDUPPseudo; def VLD1DUPq16Pseudo : VLD1QDUPPseudo; def VLD1DUPq32Pseudo : VLD1QDUPPseudo; def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), (VLD1DUPd32 addrmode6:$addr)>; def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), (VLD1DUPq32Pseudo addrmode6:$addr)>; let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { class VLD1QDUP op7_4, string Dt> : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListTwoDAllLanes:$Vd), (ins addrmode6dup:$Rn), IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; } def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8">; def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">; def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">; // ...with address register writeback: multiclass VLD1DUPWB op7_4, string Dt> { def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd, GPR:$wb), (ins addrmode6dup:$Rn), IIC_VLD1dupu, "vld1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd, GPR:$wb), (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; let AsmMatchConverter = "cvtVLDwbRegister"; } } multiclass VLD1QDUPWB op7_4, string Dt> { def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListTwoDAllLanes:$Vd, GPR:$wb), (ins addrmode6dup:$Rn), IIC_VLD1dupu, "vld1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListTwoDAllLanes:$Vd, GPR:$wb), (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; let AsmMatchConverter = "cvtVLDwbRegister"; } } defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8">; defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16">; defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32">; defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8">; defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">; defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">; def VLD1DUPq8PseudoWB_fixed : VLDQWBfixedPseudo; def VLD1DUPq16PseudoWB_fixed : VLDQWBfixedPseudo; def VLD1DUPq32PseudoWB_fixed : VLDQWBfixedPseudo; def VLD1DUPq8PseudoWB_register : VLDQWBregisterPseudo; def VLD1DUPq16PseudoWB_register : VLDQWBregisterPseudo; def VLD1DUPq32PseudoWB_register : VLDQWBregisterPseudo; // VLD2DUP : Vector Load (single 2-element structure to all lanes) class VLD2DUP op7_4, string Dt, RegisterOperand VdTy> : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd), (ins addrmode6dup:$Rn), IIC_VLD2dup, "vld2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2DupInstruction"; } def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListTwoDAllLanes>; def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListTwoDAllLanes>; def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListTwoDAllLanes>; def VLD2DUPd8Pseudo : VLDQPseudo; def VLD2DUPd16Pseudo : VLDQPseudo; def VLD2DUPd32Pseudo : VLDQPseudo; // ...with double-spaced registers (not used for codegen): def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListTwoQAllLanes>; def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListTwoQAllLanes>; def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListTwoQAllLanes>; // ...with address register writeback: multiclass VLD2DUPWB op7_4, string Dt, RegisterOperand VdTy> { def _fixed : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd, GPR:$wb), (ins addrmode6dup:$Rn), IIC_VLD2dupu, "vld2", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2DupInstruction"; let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd, GPR:$wb), (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD2dupu, "vld2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2DupInstruction"; let AsmMatchConverter = "cvtVLDwbRegister"; } } defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListTwoDAllLanes>; defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListTwoDAllLanes>; defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListTwoDAllLanes>; defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListTwoQAllLanes>; defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListTwoQAllLanes>; defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListTwoQAllLanes>; def VLD2DUPd8PseudoWB_fixed : VLDQWBfixedPseudo ; def VLD2DUPd8PseudoWB_register : VLDQWBregisterPseudo; def VLD2DUPd16PseudoWB_fixed : VLDQWBfixedPseudo ; def VLD2DUPd16PseudoWB_register : VLDQWBregisterPseudo; def VLD2DUPd32PseudoWB_fixed : VLDQWBfixedPseudo ; def VLD2DUPd32PseudoWB_register : VLDQWBregisterPseudo; // VLD3DUP : Vector Load (single 3-element structure to all lanes) class VLD3DUP op7_4, string Dt> : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), (ins addrmode6dup:$Rn), IIC_VLD3dup, "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = 0; let DecoderMethod = "DecodeVLD3DupInstruction"; } def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">; def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">; def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">; def VLD3DUPd8Pseudo : VLDQQPseudo; def VLD3DUPd16Pseudo : VLDQQPseudo; def VLD3DUPd32Pseudo : VLDQQPseudo; // ...with double-spaced registers (not used for codegen): def VLD3DUPd8x2 : VLD3DUP<{0,0,1,?}, "8">; def VLD3DUPd16x2 : VLD3DUP<{0,1,1,?}, "16">; def VLD3DUPd32x2 : VLD3DUP<{1,0,1,?}, "32">; // ...with address register writeback: class VLD3DUPWB op7_4, string Dt> : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD3dupu, "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = 0; let DecoderMethod = "DecodeVLD3DupInstruction"; } def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">; def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">; def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">; def VLD3DUPd8x2_UPD : VLD3DUPWB<{0,0,1,0}, "8">; def VLD3DUPd16x2_UPD : VLD3DUPWB<{0,1,1,?}, "16">; def VLD3DUPd32x2_UPD : VLD3DUPWB<{1,0,1,?}, "32">; def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo; def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo; def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo; // VLD4DUP : Vector Load (single 4-element structure to all lanes) class VLD4DUP op7_4, string Dt> : NLdSt<1, 0b10, 0b1111, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), (ins addrmode6dup:$Rn), IIC_VLD4dup, "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD4DupInstruction"; } def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">; def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">; def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } def VLD4DUPd8Pseudo : VLDQQPseudo; def VLD4DUPd16Pseudo : VLDQQPseudo; def VLD4DUPd32Pseudo : VLDQQPseudo; // ...with double-spaced registers (not used for codegen): def VLD4DUPd8x2 : VLD4DUP<{0,0,1,?}, "8">; def VLD4DUPd16x2 : VLD4DUP<{0,1,1,?}, "16">; def VLD4DUPd32x2 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } // ...with address register writeback: class VLD4DUPWB op7_4, string Dt> : NLdSt<1, 0b10, 0b1111, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu, "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD4DupInstruction"; } def VLD4DUPd8_UPD : VLD4DUPWB<{0,0,0,0}, "8">; def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">; def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } def VLD4DUPd8x2_UPD : VLD4DUPWB<{0,0,1,0}, "8">; def VLD4DUPd16x2_UPD : VLD4DUPWB<{0,1,1,?}, "16">; def VLD4DUPd32x2_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo; def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo; def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo; } // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { // Classes for VST* pseudo-instructions with multi-register operands. // These are expanded to real instructions after register allocation. class VSTQPseudo : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">; class VSTQWBPseudo : PseudoNLdSt<(outs GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin, "$addr.addr = $wb">; class VSTQWBfixedPseudo : PseudoNLdSt<(outs GPR:$wb), (ins addrmode6:$addr, QPR:$src), itin, "$addr.addr = $wb">; class VSTQWBregisterPseudo : PseudoNLdSt<(outs GPR:$wb), (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin, "$addr.addr = $wb">; class VSTQQPseudo : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">; class VSTQQWBPseudo : PseudoNLdSt<(outs GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin, "$addr.addr = $wb">; class VSTQQWBfixedPseudo : PseudoNLdSt<(outs GPR:$wb), (ins addrmode6:$addr, QQPR:$src), itin, "$addr.addr = $wb">; class VSTQQWBregisterPseudo : PseudoNLdSt<(outs GPR:$wb), (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin, "$addr.addr = $wb">; class VSTQQQQPseudo : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">; class VSTQQQQWBPseudo : PseudoNLdSt<(outs GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, "$addr.addr = $wb">; // VST1 : Vector Store (multiple single elements) class VST1D op7_4, string Dt> : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, VecListOneD:$Vd), IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVSTInstruction"; } class VST1Q op7_4, string Dt> : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListTwoD:$Vd), IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVSTInstruction"; } def VST1d8 : VST1D<{0,0,0,?}, "8">; def VST1d16 : VST1D<{0,1,0,?}, "16">; def VST1d32 : VST1D<{1,0,0,?}, "32">; def VST1d64 : VST1D<{1,1,0,?}, "64">; def VST1q8 : VST1Q<{0,0,?,?}, "8">; def VST1q16 : VST1Q<{0,1,?,?}, "16">; def VST1q32 : VST1Q<{1,0,?,?}, "32">; def VST1q64 : VST1Q<{1,1,?,?}, "64">; def VST1q8Pseudo : VSTQPseudo; def VST1q16Pseudo : VSTQPseudo; def VST1q32Pseudo : VSTQPseudo; def VST1q64Pseudo : VSTQPseudo; // ...with address register writeback: multiclass VST1DWB op7_4, string Dt> { def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, VecListOneD:$Vd), IIC_VLD1u, "vst1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVSTInstruction"; let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VecListOneD:$Vd), IIC_VLD1u, "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVSTInstruction"; let AsmMatchConverter = "cvtVSTwbRegister"; } } multiclass VST1QWB op7_4, string Dt> { def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, VecListTwoD:$Vd), IIC_VLD1x2u, "vst1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVSTInstruction"; let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VecListTwoD:$Vd), IIC_VLD1x2u, "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVSTInstruction"; let AsmMatchConverter = "cvtVSTwbRegister"; } } defm VST1d8wb : VST1DWB<{0,0,0,?}, "8">; defm VST1d16wb : VST1DWB<{0,1,0,?}, "16">; defm VST1d32wb : VST1DWB<{1,0,0,?}, "32">; defm VST1d64wb : VST1DWB<{1,1,0,?}, "64">; defm VST1q8wb : VST1QWB<{0,0,?,?}, "8">; defm VST1q16wb : VST1QWB<{0,1,?,?}, "16">; defm VST1q32wb : VST1QWB<{1,0,?,?}, "32">; defm VST1q64wb : VST1QWB<{1,1,?,?}, "64">; def VST1q8PseudoWB_fixed : VSTQWBfixedPseudo; def VST1q16PseudoWB_fixed : VSTQWBfixedPseudo; def VST1q32PseudoWB_fixed : VSTQWBfixedPseudo; def VST1q64PseudoWB_fixed : VSTQWBfixedPseudo; def VST1q8PseudoWB_register : VSTQWBregisterPseudo; def VST1q16PseudoWB_register : VSTQWBregisterPseudo; def VST1q32PseudoWB_register : VSTQWBregisterPseudo; def VST1q64PseudoWB_register : VSTQWBregisterPseudo; // ...with 3 registers class VST1D3 op7_4, string Dt> : NLdSt<0, 0b00, 0b0110, op7_4, (outs), (ins addrmode6:$Rn, VecListThreeD:$Vd), IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVSTInstruction"; } multiclass VST1D3WB op7_4, string Dt> { def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, "vst1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVSTInstruction"; let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd), IIC_VLD1x3u, "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVSTInstruction"; let AsmMatchConverter = "cvtVSTwbRegister"; } } def VST1d8T : VST1D3<{0,0,0,?}, "8">; def VST1d16T : VST1D3<{0,1,0,?}, "16">; def VST1d32T : VST1D3<{1,0,0,?}, "32">; def VST1d64T : VST1D3<{1,1,0,?}, "64">; defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8">; defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16">; defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">; defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">; def VST1d64TPseudo : VSTQQPseudo; def VST1d64TPseudoWB_fixed : VSTQQWBPseudo; def VST1d64TPseudoWB_register : VSTQQWBPseudo; // ...with 4 registers class VST1D4 op7_4, string Dt> : NLdSt<0, 0b00, 0b0010, op7_4, (outs), (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVSTInstruction"; } multiclass VST1D4WB op7_4, string Dt> { def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, "vst1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVSTInstruction"; let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), IIC_VLD1x4u, "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVSTInstruction"; let AsmMatchConverter = "cvtVSTwbRegister"; } } def VST1d8Q : VST1D4<{0,0,?,?}, "8">; def VST1d16Q : VST1D4<{0,1,?,?}, "16">; def VST1d32Q : VST1D4<{1,0,?,?}, "32">; def VST1d64Q : VST1D4<{1,1,?,?}, "64">; defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8">; defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16">; defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">; defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">; def VST1d64QPseudo : VSTQQPseudo; def VST1d64QPseudoWB_fixed : VSTQQWBPseudo; def VST1d64QPseudoWB_register : VSTQQWBPseudo; // VST2 : Vector Store (multiple 2-element structures) class VST2 op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, InstrItinClass itin> : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, VdTy:$Vd), itin, "vst2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVSTInstruction"; } def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListTwoD, IIC_VST2>; def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListTwoD, IIC_VST2>; def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListTwoD, IIC_VST2>; def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2>; def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>; def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>; def VST2d8Pseudo : VSTQPseudo; def VST2d16Pseudo : VSTQPseudo; def VST2d32Pseudo : VSTQPseudo; def VST2q8Pseudo : VSTQQPseudo; def VST2q16Pseudo : VSTQQPseudo; def VST2q32Pseudo : VSTQQPseudo; // ...with address register writeback: multiclass VST2DWB op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy> { def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, VdTy:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVSTInstruction"; let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVSTInstruction"; let AsmMatchConverter = "cvtVSTwbRegister"; } } multiclass VST2QWB op7_4, string Dt> { def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVSTInstruction"; let AsmMatchConverter = "cvtVSTwbFixed"; } def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVSTInstruction"; let AsmMatchConverter = "cvtVSTwbRegister"; } } defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListTwoD>; defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListTwoD>; defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListTwoD>; defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">; defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">; defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">; def VST2d8PseudoWB_fixed : VSTQWBfixedPseudo; def VST2d16PseudoWB_fixed : VSTQWBfixedPseudo; def VST2d32PseudoWB_fixed : VSTQWBfixedPseudo; def VST2d8PseudoWB_register : VSTQWBregisterPseudo; def VST2d16PseudoWB_register : VSTQWBregisterPseudo; def VST2d32PseudoWB_register : VSTQWBregisterPseudo; def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo; def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo; def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo; def VST2q8PseudoWB_register : VSTQQWBregisterPseudo; def VST2q16PseudoWB_register : VSTQQWBregisterPseudo; def VST2q32PseudoWB_register : VSTQQWBregisterPseudo; // ...with double-spaced registers def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListTwoQ, IIC_VST2>; def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListTwoQ, IIC_VST2>; def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListTwoQ, IIC_VST2>; defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListTwoQ>; defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListTwoQ>; defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListTwoQ>; // VST3 : Vector Store (multiple 3-element structures) class VST3D op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3, "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVSTInstruction"; } def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">; def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">; def VST3d8Pseudo : VSTQQPseudo; def VST3d16Pseudo : VSTQQPseudo; def VST3d32Pseudo : VSTQQPseudo; // ...with address register writeback: class VST3DWB op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u, "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVSTInstruction"; } def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">; def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">; def VST3d8Pseudo_UPD : VSTQQWBPseudo; def VST3d16Pseudo_UPD : VSTQQWBPseudo; def VST3d32Pseudo_UPD : VSTQQWBPseudo; // ...with double-spaced registers: def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">; def VST3q16 : VST3D<0b0101, {0,1,0,?}, "16">; def VST3q32 : VST3D<0b0101, {1,0,0,?}, "32">; def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">; def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">; def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">; def VST3q8Pseudo_UPD : VSTQQQQWBPseudo; def VST3q16Pseudo_UPD : VSTQQQQWBPseudo; def VST3q32Pseudo_UPD : VSTQQQQWBPseudo; // ...alternate versions to be allocated odd register numbers: def VST3q8oddPseudo : VSTQQQQPseudo; def VST3q16oddPseudo : VSTQQQQPseudo; def VST3q32oddPseudo : VSTQQQQPseudo; def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo; def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo; def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo; // VST4 : Vector Store (multiple 4-element structures) class VST4D op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVSTInstruction"; } def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">; def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">; def VST4d8Pseudo : VSTQQPseudo; def VST4d16Pseudo : VSTQQPseudo; def VST4d32Pseudo : VSTQQPseudo; // ...with address register writeback: class VST4DWB op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVSTInstruction"; } def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">; def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">; def VST4d8Pseudo_UPD : VSTQQWBPseudo; def VST4d16Pseudo_UPD : VSTQQWBPseudo; def VST4d32Pseudo_UPD : VSTQQWBPseudo; // ...with double-spaced registers: def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">; def VST4q16 : VST4D<0b0001, {0,1,?,?}, "16">; def VST4q32 : VST4D<0b0001, {1,0,?,?}, "32">; def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">; def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">; def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">; def VST4q8Pseudo_UPD : VSTQQQQWBPseudo; def VST4q16Pseudo_UPD : VSTQQQQWBPseudo; def VST4q32Pseudo_UPD : VSTQQQQWBPseudo; // ...alternate versions to be allocated odd register numbers: def VST4q8oddPseudo : VSTQQQQPseudo; def VST4q16oddPseudo : VSTQQQQPseudo; def VST4q32oddPseudo : VSTQQQQPseudo; def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo; def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo; def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo; } // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 // Classes for VST*LN pseudo-instructions with multi-register operands. // These are expanded to real instructions after register allocation. class VSTQLNPseudo : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane), itin, "">; class VSTQLNWBPseudo : PseudoNLdSt<(outs GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, QPR:$src, nohash_imm:$lane), itin, "$addr.addr = $wb">; class VSTQQLNPseudo : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane), itin, "">; class VSTQQLNWBPseudo : PseudoNLdSt<(outs GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, QQPR:$src, nohash_imm:$lane), itin, "$addr.addr = $wb">; class VSTQQQQLNPseudo : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane), itin, "">; class VSTQQQQLNWBPseudo : PseudoNLdSt<(outs GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src, nohash_imm:$lane), itin, "$addr.addr = $wb">; // VST1LN : Vector Store (single element from one lane) class VST1LN op11_8, bits<4> op7_4, string Dt, ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> : NLdStLn<1, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd, nohash_imm:$lane), IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6:$Rn)]> { let Rm = 0b1111; let DecoderMethod = "DecodeVST1LN"; } class VST1LN32 op11_8, bits<4> op7_4, string Dt, ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> : NLdStLn<1, 0b00, op11_8, op7_4, (outs), (ins addrmode6oneL32:$Rn, DPR:$Vd, nohash_imm:$lane), IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6oneL32:$Rn)]>{ let Rm = 0b1111; let DecoderMethod = "DecodeVST1LN"; } class VST1QLNPseudo : VSTQLNPseudo { let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), addrmode6:$addr)]; } def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, NEONvgetlaneu> { let Inst{7-5} = lane{2-0}; } def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, NEONvgetlaneu> { let Inst{7-6} = lane{1-0}; let Inst{4} = Rn{5}; } def VST1LNd32 : VST1LN32<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt> { let Inst{7} = lane{0}; let Inst{5-4} = Rn{5-4}; } def VST1LNq8Pseudo : VST1QLNPseudo; def VST1LNq16Pseudo : VST1QLNPseudo; def VST1LNq32Pseudo : VST1QLNPseudo; def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr), (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>; def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; // ...with address register writeback: class VST1LNWB op11_8, bits<4> op7_4, string Dt, ValueType Ty, PatFrag StoreOp, SDNode ExtractOp> : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn$Rm", "$Rn.addr = $wb", [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6:$Rn, am6offset:$Rm))]> { let DecoderMethod = "DecodeVST1LN"; } class VST1QLNWBPseudo : VSTQLNWBPseudo { let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), addrmode6:$addr, am6offset:$offset))]; } def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, NEONvgetlaneu> { let Inst{7-5} = lane{2-0}; } def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, NEONvgetlaneu> { let Inst{7-6} = lane{1-0}; let Inst{4} = Rn{5}; } def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, extractelt> { let Inst{7} = lane{0}; let Inst{5-4} = Rn{5-4}; } def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo; def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo; def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo; let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { // VST2LN : Vector Store (single 2-element structure from one lane) class VST2LN op11_8, bits<4> op7_4, string Dt> : NLdStLn<1, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVST2LN"; } def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8"> { let Inst{7-5} = lane{2-0}; } def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> { let Inst{7-6} = lane{1-0}; } def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> { let Inst{7} = lane{0}; } def VST2LNd8Pseudo : VSTQLNPseudo; def VST2LNd16Pseudo : VSTQLNPseudo; def VST2LNd32Pseudo : VSTQLNPseudo; // ...with double-spaced registers: def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> { let Inst{7-6} = lane{1-0}; let Inst{4} = Rn{4}; } def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> { let Inst{7} = lane{0}; let Inst{4} = Rn{4}; } def VST2LNq16Pseudo : VSTQQLNPseudo; def VST2LNq32Pseudo : VSTQQLNPseudo; // ...with address register writeback: class VST2LNWB op11_8, bits<4> op7_4, string Dt> : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVST2LN"; } def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8"> { let Inst{7-5} = lane{2-0}; } def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> { let Inst{7-6} = lane{1-0}; } def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> { let Inst{7} = lane{0}; } def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo; def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo; def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo; def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> { let Inst{7-6} = lane{1-0}; } def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> { let Inst{7} = lane{0}; } def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo; def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo; // VST3LN : Vector Store (single 3-element structure from one lane) class VST3LN op11_8, bits<4> op7_4, string Dt> : NLdStLn<1, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane), IIC_VST3ln, "vst3", Dt, "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> { let Rm = 0b1111; let DecoderMethod = "DecodeVST3LN"; } def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8"> { let Inst{7-5} = lane{2-0}; } def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> { let Inst{7-6} = lane{1-0}; } def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> { let Inst{7} = lane{0}; } def VST3LNd8Pseudo : VSTQQLNPseudo; def VST3LNd16Pseudo : VSTQQLNPseudo; def VST3LNd32Pseudo : VSTQQLNPseudo; // ...with double-spaced registers: def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> { let Inst{7-6} = lane{1-0}; } def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> { let Inst{7} = lane{0}; } def VST3LNq16Pseudo : VSTQQQQLNPseudo; def VST3LNq32Pseudo : VSTQQQQLNPseudo; // ...with address register writeback: class VST3LNWB op11_8, bits<4> op7_4, string Dt> : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane), IIC_VST3lnu, "vst3", Dt, "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let DecoderMethod = "DecodeVST3LN"; } def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8"> { let Inst{7-5} = lane{2-0}; } def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> { let Inst{7-6} = lane{1-0}; } def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> { let Inst{7} = lane{0}; } def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo; def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo; def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo; def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> { let Inst{7-6} = lane{1-0}; } def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> { let Inst{7} = lane{0}; } def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo; def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo; // VST4LN : Vector Store (single 4-element structure from one lane) class VST4LN op11_8, bits<4> op7_4, string Dt> : NLdStLn<1, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), IIC_VST4ln, "vst4", Dt, "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVST4LN"; } def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8"> { let Inst{7-5} = lane{2-0}; } def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> { let Inst{7-6} = lane{1-0}; } def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> { let Inst{7} = lane{0}; let Inst{5} = Rn{5}; } def VST4LNd8Pseudo : VSTQQLNPseudo; def VST4LNd16Pseudo : VSTQQLNPseudo; def VST4LNd32Pseudo : VSTQQLNPseudo; // ...with double-spaced registers: def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> { let Inst{7-6} = lane{1-0}; } def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> { let Inst{7} = lane{0}; let Inst{5} = Rn{5}; } def VST4LNq16Pseudo : VSTQQQQLNPseudo; def VST4LNq32Pseudo : VSTQQQQLNPseudo; // ...with address register writeback: class VST4LNWB op11_8, bits<4> op7_4, string Dt> : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), IIC_VST4lnu, "vst4", Dt, "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVST4LN"; } def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8"> { let Inst{7-5} = lane{2-0}; } def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> { let Inst{7-6} = lane{1-0}; } def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> { let Inst{7} = lane{0}; let Inst{5} = Rn{5}; } def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo; def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo; def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo; def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> { let Inst{7-6} = lane{1-0}; } def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> { let Inst{7} = lane{0}; let Inst{5} = Rn{5}; } def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo; def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo; } // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 //===----------------------------------------------------------------------===// // NEON pattern fragments //===----------------------------------------------------------------------===// // Extract D sub-registers of Q registers. def DSubReg_i8_reg : SDNodeXFormgetTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32); }]>; def DSubReg_i16_reg : SDNodeXFormgetTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32); }]>; def DSubReg_i32_reg : SDNodeXFormgetTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32); }]>; def DSubReg_f64_reg : SDNodeXFormgetTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32); }]>; // Extract S sub-registers of Q/D registers. def SSubReg_f32_reg : SDNodeXFormgetTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32); }]>; // Translate lane numbers from Q registers to D subregs. def SubReg_i8_lane : SDNodeXFormgetTargetConstant(N->getZExtValue() & 7, MVT::i32); }]>; def SubReg_i16_lane : SDNodeXFormgetTargetConstant(N->getZExtValue() & 3, MVT::i32); }]>; def SubReg_i32_lane : SDNodeXFormgetTargetConstant(N->getZExtValue() & 1, MVT::i32); }]>; //===----------------------------------------------------------------------===// // Instruction Classes //===----------------------------------------------------------------------===// // Basic 2-register operations: double- and quad-register. class N2VD op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2V; class N2VQ op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2V; // Basic 2-register intrinsics, both double- and quad-register. class N2VDInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V; class N2VQInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V; // Narrow 2-register operations. class N2VN op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, SDNode OpNode> : N2V; // Narrow 2-register intrinsics. class N2VNInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, Intrinsic IntOp> : N2V; // Long 2-register operations (currently only used for VMOVL). class N2VL op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, SDNode OpNode> : N2V; // Long 2-register intrinsics. class N2VLInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, Intrinsic IntOp> : N2V; // 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. class N2VDShuffle op19_18, bits<5> op11_7, string OpcodeStr, string Dt> : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm), (ins DPR:$src1, DPR:$src2), IIC_VPERMD, OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd, $src2 = $Vm", []>; class N2VQShuffle op19_18, bits<5> op11_7, InstrItinClass itin, string OpcodeStr, string Dt> : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm), (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd, $src2 = $Vm", []>; // Basic 3-register operations: double- and quad-register. class N3VD op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3V { let isCommutable = Commutable; } // Same as N3VD but no data type. class N3VDX op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3VX