diff options
| author | Rafael Espindola <rafael.espindola@gmail.com> | 2013-10-10 15:15:17 +0000 | 
|---|---|---|
| committer | Rafael Espindola <rafael.espindola@gmail.com> | 2013-10-10 15:15:17 +0000 | 
| commit | 812ddcc50f8bc3ec6ce115863ff2263815906aaf (patch) | |
| tree | 09d78a3a26f09e84735e4195e566584f0996b91a | |
| parent | d622bef31d11a5a6429fe7fad557c9b111e96f69 (diff) | |
| download | external_llvm-812ddcc50f8bc3ec6ce115863ff2263815906aaf.zip external_llvm-812ddcc50f8bc3ec6ce115863ff2263815906aaf.tar.gz external_llvm-812ddcc50f8bc3ec6ce115863ff2263815906aaf.tar.bz2 | |
Revert "Implement AArch64 vector load/store multiple N-element structure class SIMD(lselem). Including following 14 instructions: 4 ld1 insts: load multiple 1-element structure to sequential 1/2/3/4 registers. ld2/ld3/ld4: load multiple N-element structure to sequential N registers (N=2,3,4). 4 st1 insts: store multiple 1-element structure from sequential 1/2/3/4 registers. st2/st3/st4: store multiple N-element structure from sequential N registers (N = 2,3,4)."
This reverts commit r192352. It broke the build.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192354 91177308-0d34-0410-b5e6-96231b3b80d8
| -rw-r--r-- | include/llvm/CodeGen/ValueTypes.h | 2 | ||||
| -rw-r--r-- | lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 298 | ||||
| -rw-r--r-- | lib/Target/AArch64/AArch64ISelLowering.cpp | 54 | ||||
| -rw-r--r-- | lib/Target/AArch64/AArch64ISelLowering.h | 4 | ||||
| -rw-r--r-- | lib/Target/AArch64/AArch64InstrFormats.td | 18 | ||||
| -rw-r--r-- | lib/Target/AArch64/AArch64InstrNEON.td | 126 | ||||
| -rw-r--r-- | lib/Target/AArch64/AArch64RegisterInfo.td | 101 | ||||
| -rw-r--r-- | lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp | 166 | ||||
| -rw-r--r-- | lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp | 53 | ||||
| -rw-r--r-- | lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp | 30 | ||||
| -rw-r--r-- | lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h | 3 | ||||
| -rw-r--r-- | lib/Target/AArch64/Utils/AArch64BaseInfo.h | 44 | ||||
| -rw-r--r-- | test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll | 1228 | ||||
| -rw-r--r-- | test/MC/AArch64/neon-diagnostics.s | 221 | ||||
| -rw-r--r-- | test/MC/AArch64/neon-simd-ldst-multi-elem.s | 463 | 
15 files changed, 3 insertions, 2808 deletions
| diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h index 79f3233..2e8f637 100644 --- a/include/llvm/CodeGen/ValueTypes.h +++ b/include/llvm/CodeGen/ValueTypes.h @@ -208,7 +208,7 @@ namespace llvm {      bool is64BitVector() const {        return (SimpleTy == MVT::v8i8  || SimpleTy == MVT::v4i16 ||                SimpleTy == MVT::v2i32 || SimpleTy == MVT::v1i64 || -              SimpleTy == MVT::v1f64 || SimpleTy == MVT::v2f32); +              SimpleTy == MVT::v2f32);      }      /// is128BitVector - Return true if this is a 128-bit vector type. diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 3b0dd64..a865564 100644 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -109,23 +109,6 @@ public:    SDNode* Select(SDNode*);  private: -  /// Select NEON load intrinsics.  NumVecs should be 1, 2, 3 or 4. -  SDNode *SelectVLD(SDNode *N, unsigned NumVecs, const uint16_t *Opcode); - -  /// Select NEON store intrinsics.  NumVecs should be 1, 2, 3 or 4. -  SDNode *SelectVST(SDNode *N, unsigned NumVecs, const uint16_t *Opcodes); - -  // Form pairs of consecutive 64-bit/128-bit registers. -  SDNode *createDPairNode(SDValue V0, SDValue V1); -  SDNode *createQPairNode(SDValue V0, SDValue V1); - -  // Form sequences of 3 consecutive 64-bit/128-bit registers. -  SDNode *createDTripleNode(SDValue V0, SDValue V1, SDValue V2); -  SDNode *createQTripleNode(SDValue V0, SDValue V1, SDValue V2); - -  // Form sequences of 4 consecutive 64-bit/128-bit registers. -  SDNode *createDQuadNode(SDValue V0, SDValue V1, SDValue V2, SDValue V3); -  SDNode *createQQuadNode(SDValue V0, SDValue V1, SDValue V2, SDValue V3);  };  } @@ -407,221 +390,6 @@ SDNode *AArch64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8,                                &Ops[0], Ops.size());  } -SDNode *AArch64DAGToDAGISel::createDPairNode(SDValue V0, SDValue V1) { -  SDLoc dl(V0.getNode()); -  SDValue RegClass = -      CurDAG->getTargetConstant(AArch64::DPairRegClassID, MVT::i32); -  SDValue SubReg0 = CurDAG->getTargetConstant(AArch64::dsub_0, MVT::i32); -  SDValue SubReg1 = CurDAG->getTargetConstant(AArch64::dsub_1, MVT::i32); -  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; -  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::v2i64, -                                Ops); -} - -SDNode *AArch64DAGToDAGISel::createQPairNode(SDValue V0, SDValue V1) { -  SDLoc dl(V0.getNode()); -  SDValue RegClass = -      CurDAG->getTargetConstant(AArch64::QPairRegClassID, MVT::i32); -  SDValue SubReg0 = CurDAG->getTargetConstant(AArch64::qsub_0, MVT::i32); -  SDValue SubReg1 = CurDAG->getTargetConstant(AArch64::qsub_1, MVT::i32); -  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; -  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::v4i64, -                                Ops); -} - -SDNode *AArch64DAGToDAGISel::createDTripleNode(SDValue V0, SDValue V1, -                                               SDValue V2) { -  SDLoc dl(V0.getNode()); -  SDValue RegClass = -      CurDAG->getTargetConstant(AArch64::DTripleRegClassID, MVT::i32); -  SDValue SubReg0 = CurDAG->getTargetConstant(AArch64::dsub_0, MVT::i32); -  SDValue SubReg1 = CurDAG->getTargetConstant(AArch64::dsub_1, MVT::i32); -  SDValue SubReg2 = CurDAG->getTargetConstant(AArch64::dsub_2, MVT::i32); -  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, V2, SubReg2 }; -  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, -                                Ops); -} - -SDNode *AArch64DAGToDAGISel::createQTripleNode(SDValue V0, SDValue V1, -                                               SDValue V2) { -  SDLoc dl(V0.getNode()); -  SDValue RegClass = -      CurDAG->getTargetConstant(AArch64::QTripleRegClassID, MVT::i32); -  SDValue SubReg0 = CurDAG->getTargetConstant(AArch64::qsub_0, MVT::i32); -  SDValue SubReg1 = CurDAG->getTargetConstant(AArch64::qsub_1, MVT::i32); -  SDValue SubReg2 = CurDAG->getTargetConstant(AArch64::qsub_2, MVT::i32); -  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, V2, SubReg2 }; -  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, -                                Ops); -} - -SDNode *AArch64DAGToDAGISel::createDQuadNode(SDValue V0, SDValue V1, SDValue V2, -                                             SDValue V3) { -  SDLoc dl(V0.getNode()); -  SDValue RegClass = -      CurDAG->getTargetConstant(AArch64::DQuadRegClassID, MVT::i32); -  SDValue SubReg0 = CurDAG->getTargetConstant(AArch64::dsub_0, MVT::i32); -  SDValue SubReg1 = CurDAG->getTargetConstant(AArch64::dsub_1, MVT::i32); -  SDValue SubReg2 = CurDAG->getTargetConstant(AArch64::dsub_2, MVT::i32); -  SDValue SubReg3 = CurDAG->getTargetConstant(AArch64::dsub_3, MVT::i32); -  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, V2, SubReg2,  V3, -                          SubReg3 }; -  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::v4i64, -                                Ops); -} - -SDNode *AArch64DAGToDAGISel::createQQuadNode(SDValue V0, SDValue V1, SDValue V2, -                                             SDValue V3) { -  SDLoc dl(V0.getNode()); -  SDValue RegClass = -      CurDAG->getTargetConstant(AArch64::QQuadRegClassID, MVT::i32); -  SDValue SubReg0 = CurDAG->getTargetConstant(AArch64::qsub_0, MVT::i32); -  SDValue SubReg1 = CurDAG->getTargetConstant(AArch64::qsub_1, MVT::i32); -  SDValue SubReg2 = CurDAG->getTargetConstant(AArch64::qsub_2, MVT::i32); -  SDValue SubReg3 = CurDAG->getTargetConstant(AArch64::qsub_3, MVT::i32); -  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, V2, SubReg2,  V3, -                          SubReg3 }; -  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::v8i64, -                                Ops); -} - -SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, -                                       const uint16_t *Opcodes) { -  assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); - -  EVT VT = N->getValueType(0); -  unsigned OpcodeIndex; -  switch (VT.getSimpleVT().SimpleTy) { -  default: llvm_unreachable("unhandled vector load type"); -  case MVT::v8i8:  OpcodeIndex = 0; break; -  case MVT::v4i16: OpcodeIndex = 1; break; -  case MVT::v2f32: -  case MVT::v2i32: OpcodeIndex = 2; break; -  case MVT::v1f64: -  case MVT::v1i64: OpcodeIndex = 3; break; -  case MVT::v16i8: OpcodeIndex = 4; break; -  case MVT::v8f16: -  case MVT::v8i16: OpcodeIndex = 5; break; -  case MVT::v4f32: -  case MVT::v4i32: OpcodeIndex = 6; break; -  case MVT::v2f64: -  case MVT::v2i64: OpcodeIndex = 7; break; -  } -  unsigned Opc = Opcodes[OpcodeIndex]; - -  SmallVector<SDValue, 2> Ops; -  Ops.push_back(N->getOperand(2)); // Push back the Memory Address -  Ops.push_back(N->getOperand(0)); // Push back the Chain - -  std::vector<EVT> ResTys; -  bool is64BitVector = VT.is64BitVector(); - -  if (NumVecs == 1) -    ResTys.push_back(VT); -  else if (NumVecs == 3) -    ResTys.push_back(MVT::Untyped); -  else { -    EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, -                                 is64BitVector ? NumVecs : NumVecs * 2); -    ResTys.push_back(ResTy); -  } - -  ResTys.push_back(MVT::Other); // Type of the Chain -  SDLoc dl(N); -  SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); - -  // Transfer memoperands. -  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); -  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); -  cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1); - -  if (NumVecs == 1) -    return VLd; - -  // If NumVecs > 1, the return result is a super register containing 2-4 -  // consecutive vector registers. -  SDValue SuperReg = SDValue(VLd, 0); - -  unsigned Sub0 = is64BitVector ? AArch64::dsub_0 : AArch64::qsub_0; -  for (unsigned Vec = 0; Vec < NumVecs; ++Vec) -    ReplaceUses(SDValue(N, Vec), -                CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); -  // Update users of the Chain -  ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); - -  return NULL; -} - -SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, -                                       const uint16_t *Opcodes) { -  assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); -  SDLoc dl(N); - -  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); -  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); - -  unsigned Vec0Idx = 3; -  EVT VT = N->getOperand(Vec0Idx).getValueType(); -  unsigned OpcodeIndex; -  switch (VT.getSimpleVT().SimpleTy) { -  default: llvm_unreachable("unhandled vector store type"); -  case MVT::v8i8:  OpcodeIndex = 0; break; -  case MVT::v4i16: OpcodeIndex = 1; break; -  case MVT::v2f32: -  case MVT::v2i32: OpcodeIndex = 2; break; -  case MVT::v1f64: -  case MVT::v1i64: OpcodeIndex = 3; break; -  case MVT::v16i8: OpcodeIndex = 4; break; -  case MVT::v8f16: -  case MVT::v8i16: OpcodeIndex = 5; break; -  case MVT::v4f32: -  case MVT::v4i32: OpcodeIndex = 6; break; -  case MVT::v2f64: -  case MVT::v2i64: OpcodeIndex = 7; break; -  } -  unsigned Opc = Opcodes[OpcodeIndex]; - -  std::vector<EVT> ResTys; -  ResTys.push_back(MVT::Other); // Type for the Chain - -  SmallVector<SDValue, 6> Ops; -  Ops.push_back(N->getOperand(2)); // Push back the Memory Address - -  bool is64BitVector = VT.is64BitVector(); - -  SDValue V0 = N->getOperand(Vec0Idx + 0); -  SDValue SrcReg; -  if (NumVecs == 1) -    SrcReg = V0; -  else { -    SDValue V1 = N->getOperand(Vec0Idx + 1); -    if (NumVecs == 2) -      SrcReg = is64BitVector ? SDValue(createDPairNode(V0, V1), 0) -                             : SDValue(createQPairNode(V0, V1), 0); -    else { -      SDValue V2 = N->getOperand(Vec0Idx + 2); -      if (NumVecs == 3) -        SrcReg = is64BitVector ? SDValue(createDTripleNode(V0, V1, V2), 0) -                               : SDValue(createQTripleNode(V0, V1, V2), 0); -      else { -        SDValue V3 = N->getOperand(Vec0Idx + 3); -        SrcReg = is64BitVector ? SDValue(createDQuadNode(V0, V1, V2, V3), 0) -                               : SDValue(createQQuadNode(V0, V1, V2, V3), 0); -      } -    } -  } -  Ops.push_back(SrcReg); - -  // Push back the Chain -  Ops.push_back(N->getOperand(0)); - -  // Transfer memoperands. -  SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); -  cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1); - -  return VSt; -} -  SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {    // Dump information about the Node being selected    DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n"); @@ -768,72 +536,6 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {      Node = ResNode;      break;    } -  case ISD::INTRINSIC_VOID: -  case ISD::INTRINSIC_W_CHAIN: { -    unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); -    switch (IntNo) { -    default: -      break; - -    case Intrinsic::arm_neon_vld1: { -      static const uint16_t Opcodes[] = { AArch64::LD1_8B,  AArch64::LD1_4H, -                                          AArch64::LD1_2S,  AArch64::LD1_1D, -                                          AArch64::LD1_16B, AArch64::LD1_8H, -                                          AArch64::LD1_4S,  AArch64::LD1_2D }; -      return SelectVLD(Node, 1, Opcodes); -    } -    case Intrinsic::arm_neon_vld2: { -      static const uint16_t Opcodes[] = { AArch64::LD2_8B,  AArch64::LD2_4H, -                                          AArch64::LD2_2S,  AArch64::LD1_2V_1D, -                                          AArch64::LD2_16B, AArch64::LD2_8H, -                                          AArch64::LD2_4S,  AArch64::LD2_2D }; -      return SelectVLD(Node, 2, Opcodes); -    } -    case Intrinsic::arm_neon_vld3: { -      static const uint16_t Opcodes[] = { AArch64::LD3_8B,  AArch64::LD3_4H, -                                          AArch64::LD3_2S,  AArch64::LD1_3V_1D, -                                          AArch64::LD3_16B, AArch64::LD3_8H, -                                          AArch64::LD3_4S,  AArch64::LD3_2D }; -      return SelectVLD(Node, 3, Opcodes); -    } -    case Intrinsic::arm_neon_vld4: { -      static const uint16_t Opcodes[] = { AArch64::LD4_8B,  AArch64::LD4_4H, -                                          AArch64::LD4_2S,  AArch64::LD1_4V_1D, -                                          AArch64::LD4_16B, AArch64::LD4_8H, -                                          AArch64::LD4_4S,  AArch64::LD4_2D }; -      return SelectVLD(Node, 4, Opcodes); -    } -    case Intrinsic::arm_neon_vst1: { -      static const uint16_t Opcodes[] = { AArch64::ST1_8B,  AArch64::ST1_4H, -                                          AArch64::ST1_2S,  AArch64::ST1_1D, -                                          AArch64::ST1_16B, AArch64::ST1_8H, -                                          AArch64::ST1_4S,  AArch64::ST1_2D }; -      return SelectVST(Node, 1, Opcodes); -    } -    case Intrinsic::arm_neon_vst2: { -      static const uint16_t Opcodes[] = { AArch64::ST2_8B,  AArch64::ST2_4H, -                                          AArch64::ST2_2S,  AArch64::ST1_2V_1D, -                                          AArch64::ST2_16B, AArch64::ST2_8H, -                                          AArch64::ST2_4S,  AArch64::ST2_2D }; -      return SelectVST(Node, 2, Opcodes); -    } -    case Intrinsic::arm_neon_vst3: { -      static const uint16_t Opcodes[] = { AArch64::ST3_8B,  AArch64::ST3_4H, -                                          AArch64::ST3_2S,  AArch64::ST1_3V_1D, -                                          AArch64::ST3_16B, AArch64::ST3_8H, -                                          AArch64::ST3_4S,  AArch64::ST3_2D }; -      return SelectVST(Node, 3, Opcodes); -    } -    case Intrinsic::arm_neon_vst4: { -      static const uint16_t Opcodes[] = { AArch64::ST4_8B,  AArch64::ST4_4H, -                                          AArch64::ST4_2S,  AArch64::ST1_4V_1D, -                                          AArch64::ST4_16B, AArch64::ST4_8H, -                                          AArch64::ST4_4S,  AArch64::ST4_2D }; -      return SelectVST(Node, 4, Opcodes); -    } -    } -    break; -  }    default:      break; // Let generic code handle it    } diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index d89213c..d70548a 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3681,57 +3681,3 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(    // constraint into a member of a register class.    return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);  } - -/// Represent NEON load and store intrinsics as MemIntrinsicNodes. -/// The associated MachineMemOperands record the alignment specified -/// in the intrinsic calls. -bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, -                                               const CallInst &I, -                                               unsigned Intrinsic) const { -  switch (Intrinsic) { -  case Intrinsic::arm_neon_vld1: -  case Intrinsic::arm_neon_vld2: -  case Intrinsic::arm_neon_vld3: -  case Intrinsic::arm_neon_vld4: { -    Info.opc = ISD::INTRINSIC_W_CHAIN; -    // Conservatively set memVT to the entire set of vectors loaded. -    uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8; -    Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); -    Info.ptrVal = I.getArgOperand(0); -    Info.offset = 0; -    Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); -    Info.align = cast<ConstantInt>(AlignArg)->getZExtValue(); -    Info.vol = false; // volatile loads with NEON intrinsics not supported -    Info.readMem = true; -    Info.writeMem = false; -    return true; -  } -  case Intrinsic::arm_neon_vst1: -  case Intrinsic::arm_neon_vst2: -  case Intrinsic::arm_neon_vst3: -  case Intrinsic::arm_neon_vst4: { -    Info.opc = ISD::INTRINSIC_VOID; -    // Conservatively set memVT to the entire set of vectors stored. -    unsigned NumElts = 0; -    for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) { -      Type *ArgTy = I.getArgOperand(ArgI)->getType(); -      if (!ArgTy->isVectorTy()) -        break; -      NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8; -    } -    Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); -    Info.ptrVal = I.getArgOperand(0); -    Info.offset = 0; -    Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); -    Info.align = cast<ConstantInt>(AlignArg)->getZExtValue(); -    Info.vol = false; // volatile stores with NEON intrinsics not supported -    Info.readMem = false; -    Info.writeMem = true; -    return true; -  } -  default: -    break; -  } - -  return false; -} diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index da7f623..3e309a9 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -281,10 +281,6 @@ public:    std::pair<unsigned, const TargetRegisterClass*>    getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const; - -  virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, -                                  unsigned Intrinsic) const LLVM_OVERRIDE; -  private:    const InstrItineraryData *Itins; diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index ab4d083..5781578 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -1194,23 +1194,5 @@ class NeonI_Scalar2SameMisc<bit u, bits<2> size, bits<5> opcode, dag outs, dag i    // Inherit Rd in 4-0  } -// Format AdvSIMD vector load/store multiple N-element structure -class NeonI_LdStMult<bit q, bit l, bits<4> opcode, bits<2> size, -                    dag outs, dag ins, string asmstr, -                    list<dag> patterns, InstrItinClass itin> -  : A64InstRtn<outs, ins, asmstr, patterns, itin> -{ -  let Inst{31} = 0b0; -  let Inst{30} = q; -  let Inst{29-23} = 0b0011000; -  let Inst{22} = l; -  let Inst{21-16} = 0b000000; -  let Inst{15-12} = opcode; -  let Inst{11-10} = size; -   -  // Inherit Rn in 9-5 -  // Inherit Rt in 4-0 -} -  } diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index 355de53..a9f6061 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -2982,132 +2982,6 @@ defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2",  // End of implementation for instruction class (3V Diff) -// The followings are vector load/store multiple N-element structure -// (class SIMD lselem). - -// ld1:         load multiple 1-element structure to 1/2/3/4 registers. -// ld2/ld3/ld4: load multiple N-element structure to N registers (N = 2, 3, 4). -//              The structure consists of a sequence of sets of N values. -//              The first element of the structure is placed in the first lane -//              of the first first vector, the second element in the first lane -//              of the second vector, and so on.  -// E.g. LD1_3V_2S will load 32-bit elements {A, B, C, D, E, F} sequentially into -// the three 64-bit vectors list {BA, DC, FE}. -// E.g. LD3_2S will load 32-bit elements {A, B, C, D, E, F} into the three -// 64-bit vectors list {DA, EB, FC}. -// Store instructions store multiple structure to N registers like load. - - -class NeonI_LDVList<bit q, bits<4> opcode, bits<2> size, -                    RegisterOperand VecList, string asmop> -  : NeonI_LdStMult<q, 1, opcode, size, -                 (outs VecList:$Rt), (ins GPR64xsp:$Rn), -                 asmop # "\t$Rt, [$Rn]", -                 [], -                 NoItinerary> { -  let mayLoad = 1; -  let neverHasSideEffects = 1; -} - -multiclass LDVList_BHSD<bits<4> opcode, string List, string asmop> { -  def _8B : NeonI_LDVList<0, opcode, 0b00, -                          !cast<RegisterOperand>(List # "8B_operand"), asmop>; - -  def _4H : NeonI_LDVList<0, opcode, 0b01, -                          !cast<RegisterOperand>(List # "4H_operand"), asmop>; - -  def _2S : NeonI_LDVList<0, opcode, 0b10, -                          !cast<RegisterOperand>(List # "2S_operand"), asmop>; - -  def _16B : NeonI_LDVList<1, opcode, 0b00, -                           !cast<RegisterOperand>(List # "16B_operand"), asmop>; - -  def _8H : NeonI_LDVList<1, opcode, 0b01, -                          !cast<RegisterOperand>(List # "8H_operand"), asmop>; - -  def _4S : NeonI_LDVList<1, opcode, 0b10, -                          !cast<RegisterOperand>(List # "4S_operand"), asmop>; - -  def _2D : NeonI_LDVList<1, opcode, 0b11, -                          !cast<RegisterOperand>(List # "2D_operand"), asmop>; -} - -// Load multiple N-element structure to N consecutive registers (N = 1,2,3,4) -defm LD1 : LDVList_BHSD<0b0111, "VOne", "ld1">; -def LD1_1D : NeonI_LDVList<0, 0b0111, 0b11, VOne1D_operand, "ld1">; - -defm LD2 : LDVList_BHSD<0b1000, "VPair", "ld2">; - -defm LD3 : LDVList_BHSD<0b0100, "VTriple", "ld3">; - -defm LD4 : LDVList_BHSD<0b0000, "VQuad", "ld4">; - -// Load multiple 1-element structure to N consecutive registers (N = 2,3,4) -defm LD1_2V : LDVList_BHSD<0b1010, "VPair", "ld1">; -def LD1_2V_1D : NeonI_LDVList<0, 0b1010, 0b11, VPair1D_operand, "ld1">; - -defm LD1_3V : LDVList_BHSD<0b0110, "VTriple", "ld1">; -def LD1_3V_1D : NeonI_LDVList<0, 0b0110, 0b11, VTriple1D_operand, "ld1">; - -defm LD1_4V : LDVList_BHSD<0b0010, "VQuad", "ld1">; -def LD1_4V_1D : NeonI_LDVList<0, 0b0010, 0b11, VQuad1D_operand, "ld1">; - -class NeonI_STVList<bit q, bits<4> opcode, bits<2> size, -                    RegisterOperand VecList, string asmop> -  : NeonI_LdStMult<q, 0, opcode, size, -                 (outs), (ins GPR64xsp:$Rn, VecList:$Rt),  -                 asmop # "\t$Rt, [$Rn]", -                 [],  -                 NoItinerary> { -  let mayStore = 1; -  let neverHasSideEffects = 1; -} - -multiclass STVList_BHSD<bits<4> opcode, string List, string asmop> { -  def _8B : NeonI_STVList<0, opcode, 0b00, -                          !cast<RegisterOperand>(List # "8B_operand"), asmop>; - -  def _4H : NeonI_STVList<0, opcode, 0b01, -                          !cast<RegisterOperand>(List # "4H_operand"), asmop>; - -  def _2S : NeonI_STVList<0, opcode, 0b10, -                          !cast<RegisterOperand>(List # "2S_operand"), asmop>; - -  def _16B : NeonI_STVList<1, opcode, 0b00, -                           !cast<RegisterOperand>(List # "16B_operand"), asmop>; - -  def _8H : NeonI_STVList<1, opcode, 0b01, -                          !cast<RegisterOperand>(List # "8H_operand"), asmop>; - -  def _4S : NeonI_STVList<1, opcode, 0b10, -                          !cast<RegisterOperand>(List # "4S_operand"), asmop>; - -  def _2D : NeonI_STVList<1, opcode, 0b11, -                          !cast<RegisterOperand>(List # "2D_operand"), asmop>; -} - -// Store multiple N-element structures from N registers (N = 1,2,3,4) -defm ST1 : STVList_BHSD<0b0111, "VOne", "st1">; -def ST1_1D : NeonI_STVList<0, 0b0111, 0b11, VOne1D_operand, "st1">; - -defm ST2 : STVList_BHSD<0b1000, "VPair", "st2">; - -defm ST3 : STVList_BHSD<0b0100, "VTriple", "st3">; - -defm ST4 : STVList_BHSD<0b0000, "VQuad", "st4">; - -// Store multiple 1-element structures from N consecutive registers (N = 2,3,4) -defm ST1_2V : STVList_BHSD<0b1010, "VPair", "st1">; -def ST1_2V_1D : NeonI_STVList<0, 0b1010, 0b11, VPair1D_operand, "st1">; - -defm ST1_3V : STVList_BHSD<0b0110, "VTriple", "st1">; -def ST1_3V_1D : NeonI_STVList<0, 0b0110, 0b11, VTriple1D_operand, "st1">; - -defm ST1_4V : STVList_BHSD<0b0010, "VQuad", "st1">; -def ST1_4V_1D : NeonI_STVList<0, 0b0010, 0b11, VQuad1D_operand, "st1">; - -// End of vector load/store multiple N-element structure(class SIMD lselem) -  // Scalar Arithmetic  class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop> diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td index 5e2b196..b7a6acb 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/lib/Target/AArch64/AArch64RegisterInfo.td @@ -17,20 +17,6 @@ def sub_64 : SubRegIndex<64>;  def sub_32 : SubRegIndex<32>;  def sub_16 : SubRegIndex<16>;  def sub_8  : SubRegIndex<8>; - -// Note: Code depends on these having consecutive numbers. -def qqsub : SubRegIndex<256, 256>; - -def qsub_0 : SubRegIndex<128>; -def qsub_1 : SubRegIndex<128, 128>; -def qsub_2 : ComposedSubRegIndex<qqsub, qsub_0>; -def qsub_3 : ComposedSubRegIndex<qqsub, qsub_1>; - -def dsub_0 : SubRegIndex<64>; -def dsub_1 : SubRegIndex<64, 64>; -def dsub_2 : ComposedSubRegIndex<qsub_1, dsub_0>; -def dsub_3 : ComposedSubRegIndex<qsub_1, dsub_1>; -def dsub_4 : ComposedSubRegIndex<qsub_2, dsub_0>;  }  // Registers are identified with 5-bit ID numbers. @@ -202,90 +188,3 @@ def FlagClass : RegisterClass<"AArch64", [i32], 32, (add NZCV)> {    let CopyCost = -1;    let isAllocatable = 0;  } - -//===----------------------------------------------------------------------===// -//  Consecutive vector registers -//===----------------------------------------------------------------------===// -// 2 Consecutive 64-bit registers: D0_D1, D1_D2, ..., D30_D31 -def Tuples2D : RegisterTuples<[dsub_0, dsub_1], -                              [(rotl FPR64, 0), (rotl FPR64, 1)]>; -                               -// 3 Consecutive 64-bit registers: D0_D1_D2, ..., D31_D0_D1 -def Tuples3D : RegisterTuples<[dsub_0, dsub_1, dsub_2], -                              [(rotl FPR64, 0), (rotl FPR64, 1), -                               (rotl FPR64, 2)]>; -                                -// 4 Consecutive 64-bit registers: D0_D1_D2_D3, ..., D31_D0_D1_D2 -def Tuples4D : RegisterTuples<[dsub_0, dsub_1, dsub_2, dsub_3], -                              [(rotl FPR64, 0), (rotl FPR64, 1), -                               (rotl FPR64, 2), (rotl FPR64, 3)]>; - -// 2 Consecutive 128-bit registers: Q0_Q1, Q1_Q2, ..., Q30_Q31 -def Tuples2Q : RegisterTuples<[qsub_0, qsub_1], -                              [(rotl FPR128, 0), (rotl FPR128, 1)]>; - -// 3 Consecutive 128-bit registers: Q0_Q1_Q2, ..., Q31_Q0_Q1 -def Tuples3Q : RegisterTuples<[qsub_0, qsub_1, qsub_2], -                              [(rotl FPR128, 0), (rotl FPR128, 1), -                               (rotl FPR128, 2)]>; -                                -// 4 Consecutive 128-bit registers: Q0_Q1_Q2_Q3, ..., Q31_Q0_Q1_Q2 -def Tuples4Q : RegisterTuples<[qsub_0, qsub_1, qsub_2, qsub_3], -                              [(rotl FPR128, 0), (rotl FPR128, 1), -                               (rotl FPR128, 2), (rotl FPR128, 3)]>; - -// The followings are super register classes to model 2/3/4 consecutive -// 64-bit/128-bit registers. - -def DPair : RegisterClass<"AArch64", [v2i64], 64, (add Tuples2D)>; - -def DTriple : RegisterClass<"AArch64", [untyped], 64, (add Tuples3D)> { -  let Size = 192; // 3 x 64 bits, we have no predefined type of that size. -} - -def DQuad : RegisterClass<"AArch64", [v4i64], 64, (add Tuples4D)>; - -def QPair : RegisterClass<"AArch64", [v4i64], 128, (add Tuples2Q)>; - -def QTriple : RegisterClass<"AArch64", [untyped], 128, (add Tuples3Q)> { -  let Size = 384; // 3 x 128 bits, we have no predefined type of that size. -} - -def QQuad : RegisterClass<"AArch64", [v8i64], 128, (add Tuples4Q)>; - - -// The followings are vector list operands -multiclass VectorList_operands<string PREFIX, string LAYOUT, int Count, -                               RegisterClass RegList> { -  def _asmoperand : AsmOperandClass { -    let Name = PREFIX # LAYOUT # Count; -    let RenderMethod = "addVectorListOperands"; -    let PredicateMethod =  -        "isVectorList<A64Layout::_" # LAYOUT # ", " # Count # ">"; -    let ParserMethod = "ParseVectorList"; -  } - -  def _operand : RegisterOperand<RegList, -        "printVectorList<A64Layout::_" # LAYOUT # ", " # Count # ">"> { -    let ParserMatchClass = -      !cast<AsmOperandClass>(PREFIX # LAYOUT # "_asmoperand"); -  } -} - -multiclass VectorList_BHSD<string PREFIX, int Count, RegisterClass DRegList, -                           RegisterClass QRegList> { -  defm 8B : VectorList_operands<PREFIX, "8B", Count, DRegList>; -  defm 4H : VectorList_operands<PREFIX, "4H", Count, DRegList>; -  defm 2S : VectorList_operands<PREFIX, "2S", Count, DRegList>; -  defm 1D : VectorList_operands<PREFIX, "1D", Count, DRegList>; -  defm 16B : VectorList_operands<PREFIX, "16B", Count, QRegList>; -  defm 8H : VectorList_operands<PREFIX, "8H", Count, QRegList>; -  defm 4S : VectorList_operands<PREFIX, "4S", Count, QRegList>; -  defm 2D : VectorList_operands<PREFIX, "2D", Count, QRegList>; -} - -// Vector list operand with 1/2/3/4 registers: VOne8B_operand,..., VQuad2D_operand -defm VOne : VectorList_BHSD<"VOne", 1, FPR64, FPR128>; -defm VPair : VectorList_BHSD<"VPair", 2, DPair, QPair>; -defm VTriple : VectorList_BHSD<"VTriple", 3, DTriple, QTriple>; -defm VQuad : VectorList_BHSD<"VQuad", 4, DQuad, QQuad>;
\ No newline at end of file diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 127c7ec..51638d9 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -127,11 +127,6 @@ public:    OperandMatchResultTy    ParseSysRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands); -  bool TryParseVector(uint32_t &RegNum, SMLoc &RegEndLoc, StringRef &Layout, -                      SMLoc &LayoutLoc); - -  OperandMatchResultTy ParseVectorList(SmallVectorImpl<MCParsedAsmOperand *> &); -    bool validateInstruction(MCInst &Inst,                            const SmallVectorImpl<MCParsedAsmOperand*> &Operands); @@ -159,7 +154,6 @@ private:      k_Immediate,      // Including expressions referencing symbols      k_Register,      k_ShiftExtend, -    k_VectorList,     // A sequential list of 1 to 4 registers.      k_SysReg,         // The register operand of MRS and MSR instructions      k_Token,          // The mnemonic; other raw tokens the auto-generated      k_WrappedRegister // Load/store exclusive permit a wrapped register. @@ -195,13 +189,6 @@ private:      bool ImplicitAmount;    }; -  // A vector register list is a sequential list of 1 to 4 registers. -  struct VectorListOp { -    unsigned RegNum; -    unsigned Count; -    A64Layout::VectorLayout Layout; -  }; -    struct SysRegOp {      const char *Data;      unsigned Length; @@ -219,7 +206,6 @@ private:      struct ImmOp Imm;      struct RegOp Reg;      struct ShiftExtendOp ShiftExtend; -    struct VectorListOp VectorList;      struct SysRegOp SysReg;      struct TokOp Tok;    }; @@ -731,12 +717,6 @@ public:      return ShiftExtend.Amount == 8 || ShiftExtend.Amount == 16;    } -  template <A64Layout::VectorLayout Layout, unsigned Count> -  bool isVectorList() const { -    return Kind == k_VectorList && VectorList.Layout == Layout && -           VectorList.Count == Count; -  } -    template <int MemSize> bool isSImm7Scaled() const {      if (!isImm())        return false; @@ -857,18 +837,6 @@ public:      return Op;    } -  static AArch64Operand *CreateVectorList(unsigned RegNum, unsigned Count, -                                          A64Layout::VectorLayout Layout, -                                          SMLoc S, SMLoc E) { -    AArch64Operand *Op = new AArch64Operand(k_VectorList, S, E); -    Op->VectorList.RegNum = RegNum; -    Op->VectorList.Count = Count; -    Op->VectorList.Layout = Layout; -    Op->StartLoc = S; -    Op->EndLoc = E; -    return Op; -  } -    static AArch64Operand *CreateToken(StringRef Str, SMLoc S) {      AArch64Operand *Op = new AArch64Operand(k_Token, S, S);      Op->Tok.Data = Str.data(); @@ -1216,11 +1184,6 @@ public:      }      Inst.addOperand(MCOperand::CreateImm(Imm));    } - -  void addVectorListOperands(MCInst &Inst, unsigned N) const { -    assert(N == 1 && "Invalid number of operands!"); -    Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum)); -  }  };  } // end anonymous namespace. @@ -1260,6 +1223,7 @@ AArch64AsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,        else          return MatchOperand_Success;      } +      // ... or it might be a symbolish thing    }      // Fall through @@ -1303,7 +1267,7 @@ AArch64AsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,      return ParseOperand(Operands, Mnemonic);    }    // The following will likely be useful later, but not in very early cases -  case AsmToken::LCurly: // SIMD vector list is not parsed here +  case AsmToken::LCurly:  // Weird SIMD lists      llvm_unreachable("Don't know how to deal with '{' in operand");      return MatchOperand_ParseFail;    } @@ -1926,132 +1890,6 @@ AArch64AsmParser::ParseShiftExtend(    return MatchOperand_Success;  } -/// Try to parse a vector register token, If it is a vector register, -/// the token is eaten and return true. Otherwise return false. -bool AArch64AsmParser::TryParseVector(uint32_t &RegNum, SMLoc &RegEndLoc, -                                      StringRef &Layout, SMLoc &LayoutLoc) { -  bool IsVector = true; - -  if (!IdentifyRegister(RegNum, RegEndLoc, Layout, LayoutLoc)) -    IsVector = false; - -  if (!AArch64MCRegisterClasses[AArch64::FPR64RegClassID].contains(RegNum) && -      !AArch64MCRegisterClasses[AArch64::FPR128RegClassID].contains(RegNum)) -    IsVector = false; - -  if (Layout.size() == 0) -    IsVector = false; - -  if (!IsVector) -    Error(Parser.getTok().getLoc(), "expected vector type register"); - -  Parser.Lex(); // Eat this token. -  return IsVector; -} - - -// A vector list contains 1-4 consecutive registers. -// Now there are two kinds of vector list when number of vector > 1: -//   (1) {Vn.layout, Vn+1.layout, ... , Vm.layout} -//   (2) {Vn.layout - Vm.layout} -AArch64AsmParser::OperandMatchResultTy AArch64AsmParser::ParseVectorList( -    SmallVectorImpl<MCParsedAsmOperand *> &Operands) { -  if (Parser.getTok().isNot(AsmToken::LCurly)) { -    Error(Parser.getTok().getLoc(), "'{' expected"); -    return MatchOperand_ParseFail; -  } -  SMLoc SLoc = Parser.getTok().getLoc(); -  Parser.Lex(); // Eat '{' token. - -  unsigned Reg, Count = 1; -  StringRef LayoutStr; -  SMLoc RegEndLoc, LayoutLoc; -  if (!TryParseVector(Reg, RegEndLoc, LayoutStr, LayoutLoc)) -    return MatchOperand_ParseFail; - -  if (Parser.getTok().is(AsmToken::Minus)) { -    Parser.Lex(); // Eat the minus. - -    unsigned Reg2; -    StringRef LayoutStr2; -    SMLoc RegEndLoc2, LayoutLoc2; -    SMLoc RegLoc2 = Parser.getTok().getLoc(); - -    if (!TryParseVector(Reg2, RegEndLoc2, LayoutStr2, LayoutLoc2)) -      return MatchOperand_ParseFail; -    unsigned Space = (Reg < Reg2) ? (Reg2 - Reg) : (Reg2 + 32 - Reg); - -    if (LayoutStr != LayoutStr2) { -      Error(LayoutLoc2, "expected the same vector layout"); -      return MatchOperand_ParseFail; -    } -    if (Space == 0 || Space > 3) { -      Error(RegLoc2, "invalid number of vectors"); -      return MatchOperand_ParseFail; -    } - -    Count += Space; -  } else { -    unsigned LastReg = Reg; -    while (Parser.getTok().is(AsmToken::Comma)) { -      Parser.Lex(); // Eat the comma. -      unsigned Reg2; -      StringRef LayoutStr2; -      SMLoc RegEndLoc2, LayoutLoc2; -      SMLoc RegLoc2 = Parser.getTok().getLoc(); - -      if (!TryParseVector(Reg2, RegEndLoc2, LayoutStr2, LayoutLoc2)) -        return MatchOperand_ParseFail; -      unsigned Space = (LastReg < Reg2) ? (Reg2 - LastReg) -                                        : (Reg2 + 32 - LastReg); -      Count++; - -      // The space between two vectors should be 1. And they should have the same layout. -      // Total count shouldn't be great than 4 -      if (Space != 1) { -        Error(RegLoc2, "invalid space between two vectors"); -        return MatchOperand_ParseFail; -      } -      if (LayoutStr != LayoutStr2) { -        Error(LayoutLoc2, "expected the same vector layout"); -        return MatchOperand_ParseFail; -      } -      if (Count > 4) { -        Error(RegLoc2, "invalid number of vectors"); -        return MatchOperand_ParseFail; -      } - -      LastReg = Reg2; -    } -  } - -  if (Parser.getTok().isNot(AsmToken::RCurly)) { -    Error(Parser.getTok().getLoc(), "'}' expected"); -    return MatchOperand_ParseFail; -  } -  SMLoc ELoc = Parser.getTok().getLoc(); -  Parser.Lex(); // Eat '}' token. - -  A64Layout::VectorLayout Layout = A64StringToVectorLayout(LayoutStr); -  if (Count > 1) { // If count > 1, create vector list using super register. -    bool IsVec64 = (Layout < A64Layout::_16B) ? true : false; -    static unsigned SupRegIDs[3][2] = { -      { AArch64::QPairRegClassID, AArch64::DPairRegClassID }, -      { AArch64::QTripleRegClassID, AArch64::DTripleRegClassID }, -      { AArch64::QQuadRegClassID, AArch64::DQuadRegClassID } -    }; -    unsigned SupRegID = SupRegIDs[Count - 2][static_cast<int>(IsVec64)]; -    unsigned Sub0 = IsVec64 ? AArch64::dsub_0 : AArch64::qsub_0; -    const MCRegisterInfo *MRI = getContext().getRegisterInfo(); -    Reg = MRI->getMatchingSuperReg(Reg, Sub0, -                                   &AArch64MCRegisterClasses[SupRegID]); -  } -  Operands.push_back( -      AArch64Operand::CreateVectorList(Reg, Count, Layout, SLoc, ELoc)); - -  return MatchOperand_Success; -} -  // FIXME: We would really like to be able to tablegen'erate this.  bool AArch64AsmParser::  validateInstruction(MCInst &Inst, diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index 16ec0cb..b9d7c16 100644 --- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -361,59 +361,6 @@ DecodeFPR128LoRegisterClass(llvm::MCInst &Inst, unsigned RegNo,    return DecodeFPR128RegisterClass(Inst, RegNo, Address, Decoder);  } -static DecodeStatus DecodeRegisterClassByID(llvm::MCInst &Inst, unsigned RegNo, -                                            unsigned RegID, -                                            const void *Decoder) { -  if (RegNo > 31) -    return MCDisassembler::Fail; - -  uint16_t Register = getReg(Decoder, RegID, RegNo); -  Inst.addOperand(MCOperand::CreateReg(Register)); -  return MCDisassembler::Success; -} - -static DecodeStatus DecodeDPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo, -                                             uint64_t Address, -                                             const void *Decoder) { -  return DecodeRegisterClassByID(Inst, RegNo, AArch64::DPairRegClassID, -                                 Decoder); -} - -static DecodeStatus DecodeQPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo, -                                             uint64_t Address, -                                             const void *Decoder) { -  return DecodeRegisterClassByID(Inst, RegNo, AArch64::QPairRegClassID, -                                 Decoder); -} - -static DecodeStatus DecodeDTripleRegisterClass(llvm::MCInst &Inst, -                                               unsigned RegNo, uint64_t Address, -                                               const void *Decoder) { -  return DecodeRegisterClassByID(Inst, RegNo, AArch64::DTripleRegClassID, -                                 Decoder); -} - -static DecodeStatus DecodeQTripleRegisterClass(llvm::MCInst &Inst, -                                               unsigned RegNo, uint64_t Address, -                                               const void *Decoder) { -  return DecodeRegisterClassByID(Inst, RegNo, AArch64::QTripleRegClassID, -                                 Decoder); -} - -static DecodeStatus DecodeDQuadRegisterClass(llvm::MCInst &Inst, unsigned RegNo, -                                             uint64_t Address, -                                             const void *Decoder) { -  return DecodeRegisterClassByID(Inst, RegNo, AArch64::DQuadRegClassID, -                                 Decoder); -} - -static DecodeStatus DecodeQQuadRegisterClass(llvm::MCInst &Inst, unsigned RegNo, -                                             uint64_t Address, -                                             const void *Decoder) { -  return DecodeRegisterClassByID(Inst, RegNo, AArch64::QQuadRegClassID, -                                 Decoder); -} -  static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst,                                                 unsigned OptionHiS,                                                 uint64_t Address, diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp index 51335e1..26bd797 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp @@ -507,33 +507,3 @@ void AArch64InstPrinter::printNeonUImm64MaskOperand(const MCInst *MI,    O << "#0x";    O.write_hex(Mask);  } - -// If Count > 1, there are two valid kinds of vector list: -//   (1) {Vn.layout, Vn+1.layout, ... , Vm.layout} -//   (2) {Vn.layout - Vm.layout} -// We choose the first kind as output. -template <A64Layout::VectorLayout Layout, unsigned Count> -void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum, -                                         raw_ostream &O) { -  assert(Count >= 1 && Count <= 4 && "Invalid Number of Vectors"); - -  unsigned Reg = MI->getOperand(OpNum).getReg(); -  std::string LayoutStr = A64VectorLayoutToString(Layout); -  O << "{"; -  if (Count > 1) { // Print sub registers separately -    bool IsVec64 = (Layout < A64Layout::_16B) ? true : false; -    unsigned SubRegIdx = IsVec64 ? AArch64::dsub_0 : AArch64::qsub_0; -    for (unsigned I = 0; I < Count; I++) { -      std::string Name = getRegisterName(MRI.getSubReg(Reg, SubRegIdx++)); -      Name[0] = 'v'; -      O << Name << LayoutStr; -      if (I != Count - 1) -        O << ", "; -    } -  } else { // Print the register directly when NumVecs is 1. -    std::string Name = getRegisterName(Reg); -    Name[0] = 'v'; -    O << Name << LayoutStr; -  } -  O << "}"; -} diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h index 28ebfc4..71c9f4a 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h @@ -174,9 +174,6 @@ public:                                   raw_ostream &O);    void printNeonUImm64MaskOperand(const MCInst *MI, unsigned OpNum,                                    raw_ostream &O); - -  template <A64Layout::VectorLayout Layout, unsigned Count> -  void printVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O);  };  } diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h index 7db5238..e675efc 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -306,50 +306,6 @@ namespace A64SE {      };  } -namespace A64Layout { -    enum VectorLayout { -        Invalid = -1, -        _8B, -        _4H, -        _2S, -        _1D, - -        _16B, -        _8H, -        _4S, -        _2D -    }; -} - -inline static const char * -A64VectorLayoutToString(A64Layout::VectorLayout Layout) { -  switch (Layout) { -  case A64Layout::_8B:  return ".8b"; -  case A64Layout::_4H:  return ".4h"; -  case A64Layout::_2S:  return ".2s"; -  case A64Layout::_1D:  return ".1d"; -  case A64Layout::_16B:  return ".16b"; -  case A64Layout::_8H:  return ".8h"; -  case A64Layout::_4S:  return ".4s"; -  case A64Layout::_2D:  return ".2d"; -  default: llvm_unreachable("Unknown Vector Layout"); -  } -} - -inline static A64Layout::VectorLayout -A64StringToVectorLayout(StringRef LayoutStr) { -  return StringSwitch<A64Layout::VectorLayout>(LayoutStr) -             .Case(".8b", A64Layout::_8B) -             .Case(".4h", A64Layout::_4H) -             .Case(".2s", A64Layout::_2S) -             .Case(".1d", A64Layout::_1D) -             .Case(".16b", A64Layout::_16B) -             .Case(".8h", A64Layout::_8H) -             .Case(".4s", A64Layout::_4S) -             .Case(".2d", A64Layout::_2D) -             .Default(A64Layout::Invalid); -} -  namespace A64SysReg {    enum SysRegROValues {      MDCCSR_EL0        = 0x9808, // 10  011  0000  0001  000 diff --git a/test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll b/test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll deleted file mode 100644 index 4cd76bc..0000000 --- a/test/CodeGen/AArch64/neon-simd-ldst-multi-elem.ll +++ /dev/null @@ -1,1228 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s - -%struct.int8x16x2_t = type { [2 x <16 x i8>] } -%struct.int16x8x2_t = type { [2 x <8 x i16>] } -%struct.int32x4x2_t = type { [2 x <4 x i32>] } -%struct.int64x2x2_t = type { [2 x <2 x i64>] } -%struct.float32x4x2_t = type { [2 x <4 x float>] } -%struct.float64x2x2_t = type { [2 x <2 x double>] } -%struct.int8x8x2_t = type { [2 x <8 x i8>] } -%struct.int16x4x2_t = type { [2 x <4 x i16>] } -%struct.int32x2x2_t = type { [2 x <2 x i32>] } -%struct.int64x1x2_t = type { [2 x <1 x i64>] } -%struct.float32x2x2_t = type { [2 x <2 x float>] } -%struct.float64x1x2_t = type { [2 x <1 x double>] } -%struct.int8x16x3_t = type { [3 x <16 x i8>] } -%struct.int16x8x3_t = type { [3 x <8 x i16>] } -%struct.int32x4x3_t = type { [3 x <4 x i32>] } -%struct.int64x2x3_t = type { [3 x <2 x i64>] } -%struct.float32x4x3_t = type { [3 x <4 x float>] } -%struct.float64x2x3_t = type { [3 x <2 x double>] } -%struct.int8x8x3_t = type { [3 x <8 x i8>] } -%struct.int16x4x3_t = type { [3 x <4 x i16>] } -%struct.int32x2x3_t = type { [3 x <2 x i32>] } -%struct.int64x1x3_t = type { [3 x <1 x i64>] } -%struct.float32x2x3_t = type { [3 x <2 x float>] } -%struct.float64x1x3_t = type { [3 x <1 x double>] } -%struct.int8x16x4_t = type { [4 x <16 x i8>] } -%struct.int16x8x4_t = type { [4 x <8 x i16>] } -%struct.int32x4x4_t = type { [4 x <4 x i32>] } -%struct.int64x2x4_t = type { [4 x <2 x i64>] } -%struct.float32x4x4_t = type { [4 x <4 x float>] } -%struct.float64x2x4_t = type { [4 x <2 x double>] } -%struct.int8x8x4_t = type { [4 x <8 x i8>] } -%struct.int16x4x4_t = type { [4 x <4 x i16>] } -%struct.int32x2x4_t = type { [4 x <2 x i32>] } -%struct.int64x1x4_t = type { [4 x <1 x i64>] } -%struct.float32x2x4_t = type { [4 x <2 x float>] } -%struct.float64x1x4_t = type { [4 x <1 x double>] } - - -define <16 x i8> @test_vld1q_s8(i8* readonly %a) { -; CHECK: test_vld1q_s8 -; CHECK: ld1 {v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}] -  %vld1 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %a, i32 1) -  ret <16 x i8> %vld1 -} - -define <8 x i16> @test_vld1q_s16(i16* readonly %a) { -; CHECK: test_vld1q_s16 -; CHECK: ld1 {v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}] -  %1 = bitcast i16* %a to i8* -  %vld1 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %1, i32 2) -  ret <8 x i16> %vld1 -} - -define <4 x i32> @test_vld1q_s32(i32* readonly %a) { -; CHECK: test_vld1q_s32 -; CHECK: ld1 {v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}] -  %1 = bitcast i32* %a to i8* -  %vld1 = tail call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %1, i32 4) -  ret <4 x i32> %vld1 -} - -define <2 x i64> @test_vld1q_s64(i64* readonly %a) { -; CHECK: test_vld1q_s64 -; CHECK: ld1 {v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}] -  %1 = bitcast i64* %a to i8* -  %vld1 = tail call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %1, i32 8) -  ret <2 x i64> %vld1 -} - -define <4 x float> @test_vld1q_f32(float* readonly %a) { -; CHECK: test_vld1q_f32 -; CHECK: ld1 {v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}] -  %1 = bitcast float* %a to i8* -  %vld1 = tail call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %1, i32 4) -  ret <4 x float> %vld1 -} - -define <2 x double> @test_vld1q_f64(double* readonly %a) { -; CHECK: test_vld1q_f64 -; CHECK: ld1 {v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}] -  %1 = bitcast double* %a to i8* -  %vld1 = tail call <2 x double> @llvm.arm.neon.vld1.v2f64(i8* %1, i32 8) -  ret <2 x double> %vld1 -} - -define <8 x i8> @test_vld1_s8(i8* readonly %a) { -; CHECK: test_vld1_s8 -; CHECK: ld1 {v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}] -  %vld1 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %a, i32 1) -  ret <8 x i8> %vld1 -} - -define <4 x i16> @test_vld1_s16(i16* readonly %a) { -; CHECK: test_vld1_s16 -; CHECK: ld1 {v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}] -  %1 = bitcast i16* %a to i8* -  %vld1 = tail call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %1, i32 2) -  ret <4 x i16> %vld1 -} - -define <2 x i32> @test_vld1_s32(i32* readonly %a) { -; CHECK: test_vld1_s32 -; CHECK: ld1 {v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}] -  %1 = bitcast i32* %a to i8* -  %vld1 = tail call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %1, i32 4) -  ret <2 x i32> %vld1 -} - -define <1 x i64> @test_vld1_s64(i64* readonly %a) { -; CHECK: test_vld1_s64 -; CHECK: ld1 {v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}] -  %1 = bitcast i64* %a to i8* -  %vld1 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %1, i32 8) -  ret <1 x i64> %vld1 -} - -define <2 x float> @test_vld1_f32(float* readonly %a) { -; CHECK: test_vld1_f32 -; CHECK: ld1 {v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}] -  %1 = bitcast float* %a to i8* -  %vld1 = tail call <2 x float> @llvm.arm.neon.vld1.v2f32(i8* %1, i32 4) -  ret <2 x float> %vld1 -} - -define <1 x double> @test_vld1_f64(double* readonly %a) { -; CHECK: test_vld1_f64 -; CHECK: ld1 {v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}] -  %1 = bitcast double* %a to i8* -  %vld1 = tail call <1 x double> @llvm.arm.neon.vld1.v1f64(i8* %1, i32 8) -  ret <1 x double> %vld1 -} - -define <8 x i8> @test_vld1_p8(i8* readonly %a) { -; CHECK: test_vld1_p8 -; CHECK: ld1 {v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}] -  %vld1 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %a, i32 1) -  ret <8 x i8> %vld1 -} - -define <4 x i16> @test_vld1_p16(i16* readonly %a) { -; CHECK: test_vld1_p16 -; CHECK: ld1 {v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}] -  %1 = bitcast i16* %a to i8* -  %vld1 = tail call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %1, i32 2) -  ret <4 x i16> %vld1 -} - -define %struct.int8x16x2_t @test_vld2q_s8(i8* readonly %a) { -; CHECK: test_vld2q_s8 -; CHECK: ld2 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}] -  %vld2 = tail call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8* %a, i32 1) -  %vld2.fca.0.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 0 -  %vld2.fca.1.extract = extractvalue { <16 x i8>, <16 x i8> } %vld2, 1 -  %.fca.0.0.insert = insertvalue %struct.int8x16x2_t undef, <16 x i8> %vld2.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.int8x16x2_t %.fca.0.0.insert, <16 x i8> %vld2.fca.1.extract, 0, 1 -  ret %struct.int8x16x2_t %.fca.0.1.insert -} - -define %struct.int16x8x2_t @test_vld2q_s16(i16* readonly %a) { -; CHECK: test_vld2q_s16 -; CHECK: ld2 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}] -  %1 = bitcast i16* %a to i8* -  %vld2 = tail call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16(i8* %1, i32 2) -  %vld2.fca.0.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2, 0 -  %vld2.fca.1.extract = extractvalue { <8 x i16>, <8 x i16> } %vld2, 1 -  %.fca.0.0.insert = insertvalue %struct.int16x8x2_t undef, <8 x i16> %vld2.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.int16x8x2_t %.fca.0.0.insert, <8 x i16> %vld2.fca.1.extract, 0, 1 -  ret %struct.int16x8x2_t %.fca.0.1.insert -} - -define %struct.int32x4x2_t @test_vld2q_s32(i32* readonly %a) { -; CHECK: test_vld2q_s32 -; CHECK: ld2 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}] -  %1 = bitcast i32* %a to i8* -  %vld2 = tail call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32(i8* %1, i32 4) -  %vld2.fca.0.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2, 0 -  %vld2.fca.1.extract = extractvalue { <4 x i32>, <4 x i32> } %vld2, 1 -  %.fca.0.0.insert = insertvalue %struct.int32x4x2_t undef, <4 x i32> %vld2.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.int32x4x2_t %.fca.0.0.insert, <4 x i32> %vld2.fca.1.extract, 0, 1 -  ret %struct.int32x4x2_t %.fca.0.1.insert -} - -define %struct.int64x2x2_t @test_vld2q_s64(i64* readonly %a) { -; CHECK: test_vld2q_s64 -; CHECK: ld2 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}] -  %1 = bitcast i64* %a to i8* -  %vld2 = tail call { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2.v2i64(i8* %1, i32 8) -  %vld2.fca.0.extract = extractvalue { <2 x i64>, <2 x i64> } %vld2, 0 -  %vld2.fca.1.extract = extractvalue { <2 x i64>, <2 x i64> } %vld2, 1 -  %.fca.0.0.insert = insertvalue %struct.int64x2x2_t undef, <2 x i64> %vld2.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.int64x2x2_t %.fca.0.0.insert, <2 x i64> %vld2.fca.1.extract, 0, 1 -  ret %struct.int64x2x2_t %.fca.0.1.insert -} - -define %struct.float32x4x2_t @test_vld2q_f32(float* readonly %a) { -; CHECK: test_vld2q_f32 -; CHECK: ld2 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}] -  %1 = bitcast float* %a to i8* -  %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %1, i32 4) -  %vld2.fca.0.extract = extractvalue { <4 x float>, <4 x float> } %vld2, 0 -  %vld2.fca.1.extract = extractvalue { <4 x float>, <4 x float> } %vld2, 1 -  %.fca.0.0.insert = insertvalue %struct.float32x4x2_t undef, <4 x float> %vld2.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.float32x4x2_t %.fca.0.0.insert, <4 x float> %vld2.fca.1.extract, 0, 1 -  ret %struct.float32x4x2_t %.fca.0.1.insert -} - -define %struct.float64x2x2_t @test_vld2q_f64(double* readonly %a) { -; CHECK: test_vld2q_f64 -; CHECK: ld2 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}] -  %1 = bitcast double* %a to i8* -  %vld2 = tail call { <2 x double>, <2 x double> } @llvm.arm.neon.vld2.v2f64(i8* %1, i32 8) -  %vld2.fca.0.extract = extractvalue { <2 x double>, <2 x double> } %vld2, 0 -  %vld2.fca.1.extract = extractvalue { <2 x double>, <2 x double> } %vld2, 1 -  %.fca.0.0.insert = insertvalue %struct.float64x2x2_t undef, <2 x double> %vld2.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.float64x2x2_t %.fca.0.0.insert, <2 x double> %vld2.fca.1.extract, 0, 1 -  ret %struct.float64x2x2_t %.fca.0.1.insert -} - -define %struct.int8x8x2_t @test_vld2_s8(i8* readonly %a) { -; CHECK: test_vld2_s8 -; CHECK: ld2 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}] -  %vld2 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8* %a, i32 1) -  %vld2.fca.0.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 0 -  %vld2.fca.1.extract = extractvalue { <8 x i8>, <8 x i8> } %vld2, 1 -  %.fca.0.0.insert = insertvalue %struct.int8x8x2_t undef, <8 x i8> %vld2.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.int8x8x2_t %.fca.0.0.insert, <8 x i8> %vld2.fca.1.extract, 0, 1 -  ret %struct.int8x8x2_t %.fca.0.1.insert -} - -define %struct.int16x4x2_t @test_vld2_s16(i16* readonly %a) { -; CHECK: test_vld2_s16 -; CHECK: ld2 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}] -  %1 = bitcast i16* %a to i8* -  %vld2 = tail call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16(i8* %1, i32 2) -  %vld2.fca.0.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2, 0 -  %vld2.fca.1.extract = extractvalue { <4 x i16>, <4 x i16> } %vld2, 1 -  %.fca.0.0.insert = insertvalue %struct.int16x4x2_t undef, <4 x i16> %vld2.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.int16x4x2_t %.fca.0.0.insert, <4 x i16> %vld2.fca.1.extract, 0, 1 -  ret %struct.int16x4x2_t %.fca.0.1.insert -} - -define %struct.int32x2x2_t @test_vld2_s32(i32* readonly %a) { -; CHECK: test_vld2_s32 -; CHECK: ld2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}] -  %1 = bitcast i32* %a to i8* -  %vld2 = tail call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32(i8* %1, i32 4) -  %vld2.fca.0.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2, 0 -  %vld2.fca.1.extract = extractvalue { <2 x i32>, <2 x i32> } %vld2, 1 -  %.fca.0.0.insert = insertvalue %struct.int32x2x2_t undef, <2 x i32> %vld2.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.int32x2x2_t %.fca.0.0.insert, <2 x i32> %vld2.fca.1.extract, 0, 1 -  ret %struct.int32x2x2_t %.fca.0.1.insert -} - -define %struct.int64x1x2_t @test_vld2_s64(i64* readonly %a) { -; CHECK: test_vld2_s64 -; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}] -  %1 = bitcast i64* %a to i8* -  %vld2 = tail call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64(i8* %1, i32 8) -  %vld2.fca.0.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2, 0 -  %vld2.fca.1.extract = extractvalue { <1 x i64>, <1 x i64> } %vld2, 1 -  %.fca.0.0.insert = insertvalue %struct.int64x1x2_t undef, <1 x i64> %vld2.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.int64x1x2_t %.fca.0.0.insert, <1 x i64> %vld2.fca.1.extract, 0, 1 -  ret %struct.int64x1x2_t %.fca.0.1.insert -} - -define %struct.float32x2x2_t @test_vld2_f32(float* readonly %a) { -; CHECK: test_vld2_f32 -; CHECK: ld2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}] -  %1 = bitcast float* %a to i8* -  %vld2 = tail call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32(i8* %1, i32 4) -  %vld2.fca.0.extract = extractvalue { <2 x float>, <2 x float> } %vld2, 0 -  %vld2.fca.1.extract = extractvalue { <2 x float>, <2 x float> } %vld2, 1 -  %.fca.0.0.insert = insertvalue %struct.float32x2x2_t undef, <2 x float> %vld2.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.float32x2x2_t %.fca.0.0.insert, <2 x float> %vld2.fca.1.extract, 0, 1 -  ret %struct.float32x2x2_t %.fca.0.1.insert -} - -define %struct.float64x1x2_t @test_vld2_f64(double* readonly %a) { -; CHECK: test_vld2_f64 -; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}] -  %1 = bitcast double* %a to i8* -  %vld2 = tail call { <1 x double>, <1 x double> } @llvm.arm.neon.vld2.v1f64(i8* %1, i32 8) -  %vld2.fca.0.extract = extractvalue { <1 x double>, <1 x double> } %vld2, 0 -  %vld2.fca.1.extract = extractvalue { <1 x double>, <1 x double> } %vld2, 1 -  %.fca.0.0.insert = insertvalue %struct.float64x1x2_t undef, <1 x double> %vld2.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.float64x1x2_t %.fca.0.0.insert, <1 x double> %vld2.fca.1.extract, 0, 1 -  ret %struct.float64x1x2_t %.fca.0.1.insert -} - -define %struct.int8x16x3_t @test_vld3q_s8(i8* readonly %a) { -; CHECK: test_vld3q_s8 -; CHECK: ld3 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}] -  %vld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* %a, i32 1) -  %vld3.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 0 -  %vld3.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 1 -  %vld3.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vld3, 2 -  %.fca.0.0.insert = insertvalue %struct.int8x16x3_t undef, <16 x i8> %vld3.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.int8x16x3_t %.fca.0.0.insert, <16 x i8> %vld3.fca.1.extract, 0, 1 -  %.fca.0.2.insert = insertvalue %struct.int8x16x3_t %.fca.0.1.insert, <16 x i8> %vld3.fca.2.extract, 0, 2 -  ret %struct.int8x16x3_t %.fca.0.2.insert -} - -define %struct.int16x8x3_t @test_vld3q_s16(i16* readonly %a) { -; CHECK: test_vld3q_s16 -; CHECK: ld3 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}] -  %1 = bitcast i16* %a to i8* -  %vld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16(i8* %1, i32 2) -  %vld3.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3, 0 -  %vld3.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3, 1 -  %vld3.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } %vld3, 2 -  %.fca.0.0.insert = insertvalue %struct.int16x8x3_t undef, <8 x i16> %vld3.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.int16x8x3_t %.fca.0.0.insert, <8 x i16> %vld3.fca.1.extract, 0, 1 -  %.fca.0.2.insert = insertvalue %struct.int16x8x3_t %.fca.0.1.insert, <8 x i16> %vld3.fca.2.extract, 0, 2 -  ret %struct.int16x8x3_t %.fca.0.2.insert -} - -define %struct.int32x4x3_t @test_vld3q_s32(i32* readonly %a) { -; CHECK: test_vld3q_s32 -; CHECK: ld3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}] -  %1 = bitcast i32* %a to i8* -  %vld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32(i8* %1, i32 4) -  %vld3.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3, 0 -  %vld3.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3, 1 -  %vld3.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld3, 2 -  %.fca.0.0.insert = insertvalue %struct.int32x4x3_t undef, <4 x i32> %vld3.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.int32x4x3_t %.fca.0.0.insert, <4 x i32> %vld3.fca.1.extract, 0, 1 -  %.fca.0.2.insert = insertvalue %struct.int32x4x3_t %.fca.0.1.insert, <4 x i32> %vld3.fca.2.extract, 0, 2 -  ret %struct.int32x4x3_t %.fca.0.2.insert -} - -define %struct.int64x2x3_t @test_vld3q_s64(i64* readonly %a) { -; CHECK: test_vld3q_s64 -; CHECK: ld3 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}] -  %1 = bitcast i64* %a to i8* -  %vld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3.v2i64(i8* %1, i32 8) -  %vld3.fca.0.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3, 0 -  %vld3.fca.1.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3, 1 -  %vld3.fca.2.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } %vld3, 2 -  %.fca.0.0.insert = insertvalue %struct.int64x2x3_t undef, <2 x i64> %vld3.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.int64x2x3_t %.fca.0.0.insert, <2 x i64> %vld3.fca.1.extract, 0, 1 -  %.fca.0.2.insert = insertvalue %struct.int64x2x3_t %.fca.0.1.insert, <2 x i64> %vld3.fca.2.extract, 0, 2 -  ret %struct.int64x2x3_t %.fca.0.2.insert -} - -define %struct.float32x4x3_t @test_vld3q_f32(float* readonly %a) { -; CHECK: test_vld3q_f32 -; CHECK: ld3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}] -  %1 = bitcast float* %a to i8* -  %vld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8* %1, i32 4) -  %vld3.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 0 -  %vld3.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 1 -  %vld3.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float> } %vld3, 2 -  %.fca.0.0.insert = insertvalue %struct.float32x4x3_t undef, <4 x float> %vld3.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.float32x4x3_t %.fca.0.0.insert, <4 x float> %vld3.fca.1.extract, 0, 1 -  %.fca.0.2.insert = insertvalue %struct.float32x4x3_t %.fca.0.1.insert, <4 x float> %vld3.fca.2.extract, 0, 2 -  ret %struct.float32x4x3_t %.fca.0.2.insert -} - -define %struct.float64x2x3_t @test_vld3q_f64(double* readonly %a) { -; CHECK: test_vld3q_f64 -; CHECK: ld3 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}] -  %1 = bitcast double* %a to i8* -  %vld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3.v2f64(i8* %1, i32 8) -  %vld3.fca.0.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3, 0 -  %vld3.fca.1.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3, 1 -  %vld3.fca.2.extract = extractvalue { <2 x double>, <2 x double>, <2 x double> } %vld3, 2 -  %.fca.0.0.insert = insertvalue %struct.float64x2x3_t undef, <2 x double> %vld3.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.float64x2x3_t %.fca.0.0.insert, <2 x double> %vld3.fca.1.extract, 0, 1 -  %.fca.0.2.insert = insertvalue %struct.float64x2x3_t %.fca.0.1.insert, <2 x double> %vld3.fca.2.extract, 0, 2 -  ret %struct.float64x2x3_t %.fca.0.2.insert -} - -define %struct.int8x8x3_t @test_vld3_s8(i8* readonly %a) { -; CHECK: test_vld3_s8 -; CHECK: ld3 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}] -  %vld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8* %a, i32 1) -  %vld3.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 0 -  %vld3.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 1 -  %vld3.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vld3, 2 -  %.fca.0.0.insert = insertvalue %struct.int8x8x3_t undef, <8 x i8> %vld3.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.int8x8x3_t %.fca.0.0.insert, <8 x i8> %vld3.fca.1.extract, 0, 1 -  %.fca.0.2.insert = insertvalue %struct.int8x8x3_t %.fca.0.1.insert, <8 x i8> %vld3.fca.2.extract, 0, 2 -  ret %struct.int8x8x3_t %.fca.0.2.insert -} - -define %struct.int16x4x3_t @test_vld3_s16(i16* readonly %a) { -; CHECK: test_vld3_s16 -; CHECK: ld3 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}] -  %1 = bitcast i16* %a to i8* -  %vld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8* %1, i32 2) -  %vld3.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3, 0 -  %vld3.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3, 1 -  %vld3.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } %vld3, 2 -  %.fca.0.0.insert = insertvalue %struct.int16x4x3_t undef, <4 x i16> %vld3.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.int16x4x3_t %.fca.0.0.insert, <4 x i16> %vld3.fca.1.extract, 0, 1 -  %.fca.0.2.insert = insertvalue %struct.int16x4x3_t %.fca.0.1.insert, <4 x i16> %vld3.fca.2.extract, 0, 2 -  ret %struct.int16x4x3_t %.fca.0.2.insert -} - -define %struct.int32x2x3_t @test_vld3_s32(i32* readonly %a) { -; CHECK: test_vld3_s32 -; CHECK: ld3 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}] -  %1 = bitcast i32* %a to i8* -  %vld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32(i8* %1, i32 4) -  %vld3.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3, 0 -  %vld3.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3, 1 -  %vld3.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } %vld3, 2 -  %.fca.0.0.insert = insertvalue %struct.int32x2x3_t undef, <2 x i32> %vld3.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.int32x2x3_t %.fca.0.0.insert, <2 x i32> %vld3.fca.1.extract, 0, 1 -  %.fca.0.2.insert = insertvalue %struct.int32x2x3_t %.fca.0.1.insert, <2 x i32> %vld3.fca.2.extract, 0, 2 -  ret %struct.int32x2x3_t %.fca.0.2.insert -} - -define %struct.int64x1x3_t @test_vld3_s64(i64* readonly %a) { -; CHECK: test_vld3_s64 -; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}] -  %1 = bitcast i64* %a to i8* -  %vld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8* %1, i32 8) -  %vld3.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3, 0 -  %vld3.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3, 1 -  %vld3.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } %vld3, 2 -  %.fca.0.0.insert = insertvalue %struct.int64x1x3_t undef, <1 x i64> %vld3.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.int64x1x3_t %.fca.0.0.insert, <1 x i64> %vld3.fca.1.extract, 0, 1 -  %.fca.0.2.insert = insertvalue %struct.int64x1x3_t %.fca.0.1.insert, <1 x i64> %vld3.fca.2.extract, 0, 2 -  ret %struct.int64x1x3_t %.fca.0.2.insert -} - -define %struct.float32x2x3_t @test_vld3_f32(float* readonly %a) { -; CHECK: test_vld3_f32 -; CHECK: ld3 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}] -  %1 = bitcast float* %a to i8* -  %vld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3.v2f32(i8* %1, i32 4) -  %vld3.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3, 0 -  %vld3.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3, 1 -  %vld3.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float> } %vld3, 2 -  %.fca.0.0.insert = insertvalue %struct.float32x2x3_t undef, <2 x float> %vld3.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.float32x2x3_t %.fca.0.0.insert, <2 x float> %vld3.fca.1.extract, 0, 1 -  %.fca.0.2.insert = insertvalue %struct.float32x2x3_t %.fca.0.1.insert, <2 x float> %vld3.fca.2.extract, 0, 2 -  ret %struct.float32x2x3_t %.fca.0.2.insert -} - -define %struct.float64x1x3_t @test_vld3_f64(double* readonly %a) { -; CHECK: test_vld3_f64 -; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}] -  %1 = bitcast double* %a to i8* -  %vld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3.v1f64(i8* %1, i32 8) -  %vld3.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3, 0 -  %vld3.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3, 1 -  %vld3.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double> } %vld3, 2 -  %.fca.0.0.insert = insertvalue %struct.float64x1x3_t undef, <1 x double> %vld3.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.float64x1x3_t %.fca.0.0.insert, <1 x double> %vld3.fca.1.extract, 0, 1 -  %.fca.0.2.insert = insertvalue %struct.float64x1x3_t %.fca.0.1.insert, <1 x double> %vld3.fca.2.extract, 0, 2 -  ret %struct.float64x1x3_t %.fca.0.2.insert -} - -define %struct.int8x16x4_t @test_vld4q_s8(i8* readonly %a) { -; CHECK: test_vld4q_s8 -; CHECK: ld4 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [x{{[0-9]+|sp}}] -  %vld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8* %a, i32 1) -  %vld4.fca.0.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 0 -  %vld4.fca.1.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 1 -  %vld4.fca.2.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 2 -  %vld4.fca.3.extract = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vld4, 3 -  %.fca.0.0.insert = insertvalue %struct.int8x16x4_t undef, <16 x i8> %vld4.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.int8x16x4_t %.fca.0.0.insert, <16 x i8> %vld4.fca.1.extract, 0, 1 -  %.fca.0.2.insert = insertvalue %struct.int8x16x4_t %.fca.0.1.insert, <16 x i8> %vld4.fca.2.extract, 0, 2 -  %.fca.0.3.insert = insertvalue %struct.int8x16x4_t %.fca.0.2.insert, <16 x i8> %vld4.fca.3.extract, 0, 3 -  ret %struct.int8x16x4_t %.fca.0.3.insert -} - -define %struct.int16x8x4_t @test_vld4q_s16(i16* readonly %a) { -; CHECK: test_vld4q_s16 -; CHECK: ld4 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [x{{[0-9]+|sp}}] -  %1 = bitcast i16* %a to i8* -  %vld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16(i8* %1, i32 2) -  %vld4.fca.0.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 0 -  %vld4.fca.1.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 1 -  %vld4.fca.2.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 2 -  %vld4.fca.3.extract = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %vld4, 3 -  %.fca.0.0.insert = insertvalue %struct.int16x8x4_t undef, <8 x i16> %vld4.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.int16x8x4_t %.fca.0.0.insert, <8 x i16> %vld4.fca.1.extract, 0, 1 -  %.fca.0.2.insert = insertvalue %struct.int16x8x4_t %.fca.0.1.insert, <8 x i16> %vld4.fca.2.extract, 0, 2 -  %.fca.0.3.insert = insertvalue %struct.int16x8x4_t %.fca.0.2.insert, <8 x i16> %vld4.fca.3.extract, 0, 3 -  ret %struct.int16x8x4_t %.fca.0.3.insert -} - -define %struct.int32x4x4_t @test_vld4q_s32(i32* readonly %a) { -; CHECK: test_vld4q_s32 -; CHECK: ld4 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}] -  %1 = bitcast i32* %a to i8* -  %vld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32(i8* %1, i32 4) -  %vld4.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 0 -  %vld4.fca.1.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 1 -  %vld4.fca.2.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 2 -  %vld4.fca.3.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld4, 3 -  %.fca.0.0.insert = insertvalue %struct.int32x4x4_t undef, <4 x i32> %vld4.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.int32x4x4_t %.fca.0.0.insert, <4 x i32> %vld4.fca.1.extract, 0, 1 -  %.fca.0.2.insert = insertvalue %struct.int32x4x4_t %.fca.0.1.insert, <4 x i32> %vld4.fca.2.extract, 0, 2 -  %.fca.0.3.insert = insertvalue %struct.int32x4x4_t %.fca.0.2.insert, <4 x i32> %vld4.fca.3.extract, 0, 3 -  ret %struct.int32x4x4_t %.fca.0.3.insert -} - -define %struct.int64x2x4_t @test_vld4q_s64(i64* readonly %a) { -; CHECK: test_vld4q_s64 -; CHECK: ld4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}] -  %1 = bitcast i64* %a to i8* -  %vld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4.v2i64(i8* %1, i32 8) -  %vld4.fca.0.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 0 -  %vld4.fca.1.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 1 -  %vld4.fca.2.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 2 -  %vld4.fca.3.extract = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %vld4, 3 -  %.fca.0.0.insert = insertvalue %struct.int64x2x4_t undef, <2 x i64> %vld4.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.int64x2x4_t %.fca.0.0.insert, <2 x i64> %vld4.fca.1.extract, 0, 1 -  %.fca.0.2.insert = insertvalue %struct.int64x2x4_t %.fca.0.1.insert, <2 x i64> %vld4.fca.2.extract, 0, 2 -  %.fca.0.3.insert = insertvalue %struct.int64x2x4_t %.fca.0.2.insert, <2 x i64> %vld4.fca.3.extract, 0, 3 -  ret %struct.int64x2x4_t %.fca.0.3.insert -} - -define %struct.float32x4x4_t @test_vld4q_f32(float* readonly %a) { -; CHECK: test_vld4q_f32 -; CHECK: ld4 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [x{{[0-9]+|sp}}] -  %1 = bitcast float* %a to i8* -  %vld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4.v4f32(i8* %1, i32 4) -  %vld4.fca.0.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 0 -  %vld4.fca.1.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 1 -  %vld4.fca.2.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 2 -  %vld4.fca.3.extract = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %vld4, 3 -  %.fca.0.0.insert = insertvalue %struct.float32x4x4_t undef, <4 x float> %vld4.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.float32x4x4_t %.fca.0.0.insert, <4 x float> %vld4.fca.1.extract, 0, 1 -  %.fca.0.2.insert = insertvalue %struct.float32x4x4_t %.fca.0.1.insert, <4 x float> %vld4.fca.2.extract, 0, 2 -  %.fca.0.3.insert = insertvalue %struct.float32x4x4_t %.fca.0.2.insert, <4 x float> %vld4.fca.3.extract, 0, 3 -  ret %struct.float32x4x4_t %.fca.0.3.insert -} - -define %struct.float64x2x4_t @test_vld4q_f64(double* readonly %a) { -; CHECK: test_vld4q_f64 -; CHECK: ld4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [x{{[0-9]+|sp}}] -  %1 = bitcast double* %a to i8* -  %vld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4.v2f64(i8* %1, i32 8) -  %vld4.fca.0.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 0 -  %vld4.fca.1.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 1 -  %vld4.fca.2.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 2 -  %vld4.fca.3.extract = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %vld4, 3 -  %.fca.0.0.insert = insertvalue %struct.float64x2x4_t undef, <2 x double> %vld4.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.float64x2x4_t %.fca.0.0.insert, <2 x double> %vld4.fca.1.extract, 0, 1 -  %.fca.0.2.insert = insertvalue %struct.float64x2x4_t %.fca.0.1.insert, <2 x double> %vld4.fca.2.extract, 0, 2 -  %.fca.0.3.insert = insertvalue %struct.float64x2x4_t %.fca.0.2.insert, <2 x double> %vld4.fca.3.extract, 0, 3 -  ret %struct.float64x2x4_t %.fca.0.3.insert -} - -define %struct.int8x8x4_t @test_vld4_s8(i8* readonly %a) { -; CHECK: test_vld4_s8 -; CHECK: ld4 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [x{{[0-9]+|sp}}] -  %vld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8* %a, i32 1) -  %vld4.fca.0.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 0 -  %vld4.fca.1.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 1 -  %vld4.fca.2.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 2 -  %vld4.fca.3.extract = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %vld4, 3 -  %.fca.0.0.insert = insertvalue %struct.int8x8x4_t undef, <8 x i8> %vld4.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.int8x8x4_t %.fca.0.0.insert, <8 x i8> %vld4.fca.1.extract, 0, 1 -  %.fca.0.2.insert = insertvalue %struct.int8x8x4_t %.fca.0.1.insert, <8 x i8> %vld4.fca.2.extract, 0, 2 -  %.fca.0.3.insert = insertvalue %struct.int8x8x4_t %.fca.0.2.insert, <8 x i8> %vld4.fca.3.extract, 0, 3 -  ret %struct.int8x8x4_t %.fca.0.3.insert -} - -define %struct.int16x4x4_t @test_vld4_s16(i16* readonly %a) { -; CHECK: test_vld4_s16 -; CHECK: ld4 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [x{{[0-9]+|sp}}] -  %1 = bitcast i16* %a to i8* -  %vld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16(i8* %1, i32 2) -  %vld4.fca.0.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 0 -  %vld4.fca.1.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 1 -  %vld4.fca.2.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 2 -  %vld4.fca.3.extract = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %vld4, 3 -  %.fca.0.0.insert = insertvalue %struct.int16x4x4_t undef, <4 x i16> %vld4.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.int16x4x4_t %.fca.0.0.insert, <4 x i16> %vld4.fca.1.extract, 0, 1 -  %.fca.0.2.insert = insertvalue %struct.int16x4x4_t %.fca.0.1.insert, <4 x i16> %vld4.fca.2.extract, 0, 2 -  %.fca.0.3.insert = insertvalue %struct.int16x4x4_t %.fca.0.2.insert, <4 x i16> %vld4.fca.3.extract, 0, 3 -  ret %struct.int16x4x4_t %.fca.0.3.insert -} - -define %struct.int32x2x4_t @test_vld4_s32(i32* readonly %a) { -; CHECK: test_vld4_s32 -; CHECK: ld4 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}] -  %1 = bitcast i32* %a to i8* -  %vld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32(i8* %1, i32 4) -  %vld4.fca.0.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 0 -  %vld4.fca.1.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 1 -  %vld4.fca.2.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 2 -  %vld4.fca.3.extract = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %vld4, 3 -  %.fca.0.0.insert = insertvalue %struct.int32x2x4_t undef, <2 x i32> %vld4.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.int32x2x4_t %.fca.0.0.insert, <2 x i32> %vld4.fca.1.extract, 0, 1 -  %.fca.0.2.insert = insertvalue %struct.int32x2x4_t %.fca.0.1.insert, <2 x i32> %vld4.fca.2.extract, 0, 2 -  %.fca.0.3.insert = insertvalue %struct.int32x2x4_t %.fca.0.2.insert, <2 x i32> %vld4.fca.3.extract, 0, 3 -  ret %struct.int32x2x4_t %.fca.0.3.insert -} - -define %struct.int64x1x4_t @test_vld4_s64(i64* readonly %a) { -; CHECK: test_vld4_s64 -; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}] -  %1 = bitcast i64* %a to i8* -  %vld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8* %1, i32 8) -  %vld4.fca.0.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 0 -  %vld4.fca.1.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 1 -  %vld4.fca.2.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 2 -  %vld4.fca.3.extract = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %vld4, 3 -  %.fca.0.0.insert = insertvalue %struct.int64x1x4_t undef, <1 x i64> %vld4.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.int64x1x4_t %.fca.0.0.insert, <1 x i64> %vld4.fca.1.extract, 0, 1 -  %.fca.0.2.insert = insertvalue %struct.int64x1x4_t %.fca.0.1.insert, <1 x i64> %vld4.fca.2.extract, 0, 2 -  %.fca.0.3.insert = insertvalue %struct.int64x1x4_t %.fca.0.2.insert, <1 x i64> %vld4.fca.3.extract, 0, 3 -  ret %struct.int64x1x4_t %.fca.0.3.insert -} - -define %struct.float32x2x4_t @test_vld4_f32(float* readonly %a) { -; CHECK: test_vld4_f32 -; CHECK: ld4 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [x{{[0-9]+|sp}}] -  %1 = bitcast float* %a to i8* -  %vld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4.v2f32(i8* %1, i32 4) -  %vld4.fca.0.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 0 -  %vld4.fca.1.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 1 -  %vld4.fca.2.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 2 -  %vld4.fca.3.extract = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %vld4, 3 -  %.fca.0.0.insert = insertvalue %struct.float32x2x4_t undef, <2 x float> %vld4.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.float32x2x4_t %.fca.0.0.insert, <2 x float> %vld4.fca.1.extract, 0, 1 -  %.fca.0.2.insert = insertvalue %struct.float32x2x4_t %.fca.0.1.insert, <2 x float> %vld4.fca.2.extract, 0, 2 -  %.fca.0.3.insert = insertvalue %struct.float32x2x4_t %.fca.0.2.insert, <2 x float> %vld4.fca.3.extract, 0, 3 -  ret %struct.float32x2x4_t %.fca.0.3.insert -} - -define %struct.float64x1x4_t @test_vld4_f64(double* readonly %a) { -; CHECK: test_vld4_f64 -; CHECK: ld1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [x{{[0-9]+|sp}}] -  %1 = bitcast double* %a to i8* -  %vld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4.v1f64(i8* %1, i32 8) -  %vld4.fca.0.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 0 -  %vld4.fca.1.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 1 -  %vld4.fca.2.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 2 -  %vld4.fca.3.extract = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %vld4, 3 -  %.fca.0.0.insert = insertvalue %struct.float64x1x4_t undef, <1 x double> %vld4.fca.0.extract, 0, 0 -  %.fca.0.1.insert = insertvalue %struct.float64x1x4_t %.fca.0.0.insert, <1 x double> %vld4.fca.1.extract, 0, 1 -  %.fca.0.2.insert = insertvalue %struct.float64x1x4_t %.fca.0.1.insert, <1 x double> %vld4.fca.2.extract, 0, 2 -  %.fca.0.3.insert = insertvalue %struct.float64x1x4_t %.fca.0.2.insert, <1 x double> %vld4.fca.3.extract, 0, 3 -  ret %struct.float64x1x4_t %.fca.0.3.insert -} - -declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*, i32) -declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) -declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*, i32) -declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32) -declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) -declare <2 x double> @llvm.arm.neon.vld1.v2f64(i8*, i32) -declare <8 x i8> @llvm.arm.neon.vld1.v8i8(i8*, i32) -declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*, i32) -declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*, i32) -declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*, i32) -declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*, i32) -declare <1 x double> @llvm.arm.neon.vld1.v1f64(i8*, i32) -declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8(i8*, i32) -declare { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16(i8*, i32) -declare { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32(i8*, i32) -declare { <2 x i64>, <2 x i64> } @llvm.arm.neon.vld2.v2i64(i8*, i32) -declare { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8*, i32) -declare { <2 x double>, <2 x double> } @llvm.arm.neon.vld2.v2f64(i8*, i32) -declare { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8(i8*, i32) -declare { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16(i8*, i32) -declare { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32(i8*, i32) -declare { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64(i8*, i32) -declare { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32(i8*, i32) -declare { <1 x double>, <1 x double> } @llvm.arm.neon.vld2.v1f64(i8*, i32) -declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8*, i32) -declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16(i8*, i32) -declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32(i8*, i32) -declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld3.v2i64(i8*, i32) -declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8*, i32) -declare { <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld3.v2f64(i8*, i32) -declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8(i8*, i32) -declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16(i8*, i32) -declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32(i8*, i32) -declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8*, i32) -declare { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3.v2f32(i8*, i32) -declare { <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld3.v1f64(i8*, i32) -declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8*, i32) -declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16(i8*, i32) -declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32(i8*, i32) -declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm.neon.vld4.v2i64(i8*, i32) -declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4.v4f32(i8*, i32) -declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm.neon.vld4.v2f64(i8*, i32) -declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8(i8*, i32) -declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16(i8*, i32) -declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32(i8*, i32) -declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8*, i32) -declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4.v2f32(i8*, i32) -declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm.neon.vld4.v1f64(i8*, i32) - -define void @test_vst1q_s8(i8* %a, <16 x i8> %b) { -; CHECK: test_vst1q_s8 -; CHECK: st1 {v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] -  tail call void @llvm.arm.neon.vst1.v16i8(i8* %a, <16 x i8> %b, i32 1) -  ret void -} - -define void @test_vst1q_s16(i16* %a, <8 x i16> %b) { -; CHECK: test_vst1q_s16 -; CHECK: st1 {v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}] -  %1 = bitcast i16* %a to i8* -  tail call void @llvm.arm.neon.vst1.v8i16(i8* %1, <8 x i16> %b, i32 2) -  ret void -} - -define void @test_vst1q_s32(i32* %a, <4 x i32> %b) { -; CHECK: test_vst1q_s32 -; CHECK: st1 {v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}] -  %1 = bitcast i32* %a to i8* -  tail call void @llvm.arm.neon.vst1.v4i32(i8* %1, <4 x i32> %b, i32 4) -  ret void -} - -define void @test_vst1q_s64(i64* %a, <2 x i64> %b) { -; CHECK: test_vst1q_s64 -; CHECK: st1 {v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}] -  %1 = bitcast i64* %a to i8* -  tail call void @llvm.arm.neon.vst1.v2i64(i8* %1, <2 x i64> %b, i32 8) -  ret void -} - -define void @test_vst1q_f32(float* %a, <4 x float> %b) { -; CHECK: test_vst1q_f32 -; CHECK: st1 {v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}] -  %1 = bitcast float* %a to i8* -  tail call void @llvm.arm.neon.vst1.v4f32(i8* %1, <4 x float> %b, i32 4) -  ret void -} - -define void @test_vst1q_f64(double* %a, <2 x double> %b) { -; CHECK: test_vst1q_f64 -; CHECK: st1 {v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}] -  %1 = bitcast double* %a to i8* -  tail call void @llvm.arm.neon.vst1.v2f64(i8* %1, <2 x double> %b, i32 8) -  ret void -} - -define void @test_vst1_s8(i8* %a, <8 x i8> %b) { -; CHECK: test_vst1_s8 -; CHECK: st1 {v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}] -  tail call void @llvm.arm.neon.vst1.v8i8(i8* %a, <8 x i8> %b, i32 1) -  ret void -} - -define void @test_vst1_s16(i16* %a, <4 x i16> %b) { -; CHECK: test_vst1_s16 -; CHECK: st1 {v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}] -  %1 = bitcast i16* %a to i8* -  tail call void @llvm.arm.neon.vst1.v4i16(i8* %1, <4 x i16> %b, i32 2) -  ret void -} - -define void @test_vst1_s32(i32* %a, <2 x i32> %b) { -; CHECK: test_vst1_s32 -; CHECK: st1 {v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}] -  %1 = bitcast i32* %a to i8* -  tail call void @llvm.arm.neon.vst1.v2i32(i8* %1, <2 x i32> %b, i32 4) -  ret void -} - -define void @test_vst1_s64(i64* %a, <1 x i64> %b) { -; CHECK: test_vst1_s64 -; CHECK: st1 {v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}] -  %1 = bitcast i64* %a to i8* -  tail call void @llvm.arm.neon.vst1.v1i64(i8* %1, <1 x i64> %b, i32 8) -  ret void -} - -define void @test_vst1_f32(float* %a, <2 x float> %b) { -; CHECK: test_vst1_f32 -; CHECK: st1 {v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}] -  %1 = bitcast float* %a to i8* -  tail call void @llvm.arm.neon.vst1.v2f32(i8* %1, <2 x float> %b, i32 4) -  ret void -} - -define void @test_vst1_f64(double* %a, <1 x double> %b) { -; CHECK: test_vst1_f64 -; CHECK: st1 {v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}] -  %1 = bitcast double* %a to i8* -  tail call void @llvm.arm.neon.vst1.v1f64(i8* %1, <1 x double> %b, i32 8) -  ret void -} - -define void @test_vst2q_s8(i8* %a, [2 x <16 x i8>] %b.coerce) { -; CHECK: test_vst2q_s8 -; CHECK: st2 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [2 x <16 x i8>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [2 x <16 x i8>] %b.coerce, 1 -  tail call void @llvm.arm.neon.vst2.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, i32 1) -  ret void -} - -define void @test_vst2q_s16(i16* %a, [2 x <8 x i16>] %b.coerce) { -; CHECK: test_vst2q_s16 -; CHECK: st2 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [2 x <8 x i16>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [2 x <8 x i16>] %b.coerce, 1 -  %1 = bitcast i16* %a to i8* -  tail call void @llvm.arm.neon.vst2.v8i16(i8* %1, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, i32 2) -  ret void -} - -define void @test_vst2q_s32(i32* %a, [2 x <4 x i32>] %b.coerce) { -; CHECK: test_vst2q_s32 -; CHECK: st2 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [2 x <4 x i32>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [2 x <4 x i32>] %b.coerce, 1 -  %1 = bitcast i32* %a to i8* -  tail call void @llvm.arm.neon.vst2.v4i32(i8* %1, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, i32 4) -  ret void -} - -define void @test_vst2q_s64(i64* %a, [2 x <2 x i64>] %b.coerce) { -; CHECK: test_vst2q_s64 -; CHECK: st2 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [2 x <2 x i64>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [2 x <2 x i64>] %b.coerce, 1 -  %1 = bitcast i64* %a to i8* -  tail call void @llvm.arm.neon.vst2.v2i64(i8* %1, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, i32 8) -  ret void -} - -define void @test_vst2q_f32(float* %a, [2 x <4 x float>] %b.coerce) { -; CHECK: test_vst2q_f32 -; CHECK: st2 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [2 x <4 x float>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [2 x <4 x float>] %b.coerce, 1 -  %1 = bitcast float* %a to i8* -  tail call void @llvm.arm.neon.vst2.v4f32(i8* %1, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, i32 4) -  ret void -} - -define void @test_vst2q_f64(double* %a, [2 x <2 x double>] %b.coerce) { -; CHECK: test_vst2q_f64 -; CHECK: st2 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [2 x <2 x double>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [2 x <2 x double>] %b.coerce, 1 -  %1 = bitcast double* %a to i8* -  tail call void @llvm.arm.neon.vst2.v2f64(i8* %1, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, i32 8) -  ret void -} - -define void @test_vst2_s8(i8* %a, [2 x <8 x i8>] %b.coerce) { -; CHECK: test_vst2_s8 -; CHECK: st2 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [2 x <8 x i8>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [2 x <8 x i8>] %b.coerce, 1 -  tail call void @llvm.arm.neon.vst2.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, i32 1) -  ret void -} - -define void @test_vst2_s16(i16* %a, [2 x <4 x i16>] %b.coerce) { -; CHECK: test_vst2_s16 -; CHECK: st2 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [2 x <4 x i16>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [2 x <4 x i16>] %b.coerce, 1 -  %1 = bitcast i16* %a to i8* -  tail call void @llvm.arm.neon.vst2.v4i16(i8* %1, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, i32 2) -  ret void -} - -define void @test_vst2_s32(i32* %a, [2 x <2 x i32>] %b.coerce) { -; CHECK: test_vst2_s32 -; CHECK: st2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [2 x <2 x i32>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [2 x <2 x i32>] %b.coerce, 1 -  %1 = bitcast i32* %a to i8* -  tail call void @llvm.arm.neon.vst2.v2i32(i8* %1, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, i32 4) -  ret void -} - -define void @test_vst2_s64(i64* %a, [2 x <1 x i64>] %b.coerce) { -; CHECK: test_vst2_s64 -; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [2 x <1 x i64>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [2 x <1 x i64>] %b.coerce, 1 -  %1 = bitcast i64* %a to i8* -  tail call void @llvm.arm.neon.vst2.v1i64(i8* %1, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, i32 8) -  ret void -} - -define void @test_vst2_f32(float* %a, [2 x <2 x float>] %b.coerce) { -; CHECK: test_vst2_f32 -; CHECK: st2 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [2 x <2 x float>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [2 x <2 x float>] %b.coerce, 1 -  %1 = bitcast float* %a to i8* -  tail call void @llvm.arm.neon.vst2.v2f32(i8* %1, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, i32 4) -  ret void -} - -define void @test_vst2_f64(double* %a, [2 x <1 x double>] %b.coerce) { -; CHECK: test_vst2_f64 -; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [2 x <1 x double>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [2 x <1 x double>] %b.coerce, 1 -  %1 = bitcast double* %a to i8* -  tail call void @llvm.arm.neon.vst2.v1f64(i8* %1, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, i32 8) -  ret void -} - -define void @test_vst3q_s8(i8* %a, [3 x <16 x i8>] %b.coerce) { -; CHECK: test_vst3q_s8 -; CHECK: st3 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [3 x <16 x i8>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [3 x <16 x i8>] %b.coerce, 1 -  %b.coerce.fca.2.extract = extractvalue [3 x <16 x i8>] %b.coerce, 2 -  tail call void @llvm.arm.neon.vst3.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, i32 1) -  ret void -} - -define void @test_vst3q_s16(i16* %a, [3 x <8 x i16>] %b.coerce) { -; CHECK: test_vst3q_s16 -; CHECK: st3 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [3 x <8 x i16>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [3 x <8 x i16>] %b.coerce, 1 -  %b.coerce.fca.2.extract = extractvalue [3 x <8 x i16>] %b.coerce, 2 -  %1 = bitcast i16* %a to i8* -  tail call void @llvm.arm.neon.vst3.v8i16(i8* %1, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, i32 2) -  ret void -} - -define void @test_vst3q_s32(i32* %a, [3 x <4 x i32>] %b.coerce) { -; CHECK: test_vst3q_s32 -; CHECK: st3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [3 x <4 x i32>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [3 x <4 x i32>] %b.coerce, 1 -  %b.coerce.fca.2.extract = extractvalue [3 x <4 x i32>] %b.coerce, 2 -  %1 = bitcast i32* %a to i8* -  tail call void @llvm.arm.neon.vst3.v4i32(i8* %1, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, i32 4) -  ret void -} - -define void @test_vst3q_s64(i64* %a, [3 x <2 x i64>] %b.coerce) { -; CHECK: test_vst3q_s64 -; CHECK: st3 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [3 x <2 x i64>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [3 x <2 x i64>] %b.coerce, 1 -  %b.coerce.fca.2.extract = extractvalue [3 x <2 x i64>] %b.coerce, 2 -  %1 = bitcast i64* %a to i8* -  tail call void @llvm.arm.neon.vst3.v2i64(i8* %1, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, i32 8) -  ret void -} - -define void @test_vst3q_f32(float* %a, [3 x <4 x float>] %b.coerce) { -; CHECK: test_vst3q_f32 -; CHECK: st3 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [3 x <4 x float>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [3 x <4 x float>] %b.coerce, 1 -  %b.coerce.fca.2.extract = extractvalue [3 x <4 x float>] %b.coerce, 2 -  %1 = bitcast float* %a to i8* -  tail call void @llvm.arm.neon.vst3.v4f32(i8* %1, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, i32 4) -  ret void -} - -define void @test_vst3q_f64(double* %a, [3 x <2 x double>] %b.coerce) { -; CHECK: test_vst3q_f64 -; CHECK: st3 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [3 x <2 x double>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [3 x <2 x double>] %b.coerce, 1 -  %b.coerce.fca.2.extract = extractvalue [3 x <2 x double>] %b.coerce, 2 -  %1 = bitcast double* %a to i8* -  tail call void @llvm.arm.neon.vst3.v2f64(i8* %1, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, i32 8) -  ret void -} - -define void @test_vst3_s8(i8* %a, [3 x <8 x i8>] %b.coerce) { -; CHECK: test_vst3_s8 -; CHECK: st3 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [3 x <8 x i8>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [3 x <8 x i8>] %b.coerce, 1 -  %b.coerce.fca.2.extract = extractvalue [3 x <8 x i8>] %b.coerce, 2 -  tail call void @llvm.arm.neon.vst3.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, i32 1) -  ret void -} - -define void @test_vst3_s16(i16* %a, [3 x <4 x i16>] %b.coerce) { -; CHECK: test_vst3_s16 -; CHECK: st3 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [3 x <4 x i16>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [3 x <4 x i16>] %b.coerce, 1 -  %b.coerce.fca.2.extract = extractvalue [3 x <4 x i16>] %b.coerce, 2 -  %1 = bitcast i16* %a to i8* -  tail call void @llvm.arm.neon.vst3.v4i16(i8* %1, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, i32 2) -  ret void -} - -define void @test_vst3_s32(i32* %a, [3 x <2 x i32>] %b.coerce) { -; CHECK: test_vst3_s32 -; CHECK: st3 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [3 x <2 x i32>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [3 x <2 x i32>] %b.coerce, 1 -  %b.coerce.fca.2.extract = extractvalue [3 x <2 x i32>] %b.coerce, 2 -  %1 = bitcast i32* %a to i8* -  tail call void @llvm.arm.neon.vst3.v2i32(i8* %1, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, i32 4) -  ret void -} - -define void @test_vst3_s64(i64* %a, [3 x <1 x i64>] %b.coerce) { -; CHECK: test_vst3_s64 -; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [3 x <1 x i64>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [3 x <1 x i64>] %b.coerce, 1 -  %b.coerce.fca.2.extract = extractvalue [3 x <1 x i64>] %b.coerce, 2 -  %1 = bitcast i64* %a to i8* -  tail call void @llvm.arm.neon.vst3.v1i64(i8* %1, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, i32 8) -  ret void -} - -define void @test_vst3_f32(float* %a, [3 x <2 x float>] %b.coerce) { -; CHECK: test_vst3_f32 -; CHECK: st3 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [3 x <2 x float>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [3 x <2 x float>] %b.coerce, 1 -  %b.coerce.fca.2.extract = extractvalue [3 x <2 x float>] %b.coerce, 2 -  %1 = bitcast float* %a to i8* -  tail call void @llvm.arm.neon.vst3.v2f32(i8* %1, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, i32 4) -  ret void -} - -define void @test_vst3_f64(double* %a, [3 x <1 x double>] %b.coerce) { -; CHECK: test_vst3_f64 -; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [3 x <1 x double>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [3 x <1 x double>] %b.coerce, 1 -  %b.coerce.fca.2.extract = extractvalue [3 x <1 x double>] %b.coerce, 2 -  %1 = bitcast double* %a to i8* -  tail call void @llvm.arm.neon.vst3.v1f64(i8* %1, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, i32 8) -  ret void -} - -define void @test_vst4q_s8(i8* %a, [4 x <16 x i8>] %b.coerce) { -; CHECK: test_vst4q_s8 -; CHECK: st4 {v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [4 x <16 x i8>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [4 x <16 x i8>] %b.coerce, 1 -  %b.coerce.fca.2.extract = extractvalue [4 x <16 x i8>] %b.coerce, 2 -  %b.coerce.fca.3.extract = extractvalue [4 x <16 x i8>] %b.coerce, 3 -  tail call void @llvm.arm.neon.vst4.v16i8(i8* %a, <16 x i8> %b.coerce.fca.0.extract, <16 x i8> %b.coerce.fca.1.extract, <16 x i8> %b.coerce.fca.2.extract, <16 x i8> %b.coerce.fca.3.extract, i32 1) -  ret void -} - -define void @test_vst4q_s16(i16* %a, [4 x <8 x i16>] %b.coerce) { -; CHECK: test_vst4q_s16 -; CHECK: st4 {v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [4 x <8 x i16>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [4 x <8 x i16>] %b.coerce, 1 -  %b.coerce.fca.2.extract = extractvalue [4 x <8 x i16>] %b.coerce, 2 -  %b.coerce.fca.3.extract = extractvalue [4 x <8 x i16>] %b.coerce, 3 -  %1 = bitcast i16* %a to i8* -  tail call void @llvm.arm.neon.vst4.v8i16(i8* %1, <8 x i16> %b.coerce.fca.0.extract, <8 x i16> %b.coerce.fca.1.extract, <8 x i16> %b.coerce.fca.2.extract, <8 x i16> %b.coerce.fca.3.extract, i32 2) -  ret void -} - -define void @test_vst4q_s32(i32* %a, [4 x <4 x i32>] %b.coerce) { -; CHECK: test_vst4q_s32 -; CHECK: st4 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [4 x <4 x i32>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [4 x <4 x i32>] %b.coerce, 1 -  %b.coerce.fca.2.extract = extractvalue [4 x <4 x i32>] %b.coerce, 2 -  %b.coerce.fca.3.extract = extractvalue [4 x <4 x i32>] %b.coerce, 3 -  %1 = bitcast i32* %a to i8* -  tail call void @llvm.arm.neon.vst4.v4i32(i8* %1, <4 x i32> %b.coerce.fca.0.extract, <4 x i32> %b.coerce.fca.1.extract, <4 x i32> %b.coerce.fca.2.extract, <4 x i32> %b.coerce.fca.3.extract, i32 4) -  ret void -} - -define void @test_vst4q_s64(i64* %a, [4 x <2 x i64>] %b.coerce) { -; CHECK: test_vst4q_s64 -; CHECK: st4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [4 x <2 x i64>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [4 x <2 x i64>] %b.coerce, 1 -  %b.coerce.fca.2.extract = extractvalue [4 x <2 x i64>] %b.coerce, 2 -  %b.coerce.fca.3.extract = extractvalue [4 x <2 x i64>] %b.coerce, 3 -  %1 = bitcast i64* %a to i8* -  tail call void @llvm.arm.neon.vst4.v2i64(i8* %1, <2 x i64> %b.coerce.fca.0.extract, <2 x i64> %b.coerce.fca.1.extract, <2 x i64> %b.coerce.fca.2.extract, <2 x i64> %b.coerce.fca.3.extract, i32 8) -  ret void -} - -define void @test_vst4q_f32(float* %a, [4 x <4 x float>] %b.coerce) { -; CHECK: test_vst4q_f32 -; CHECK: st4 {v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [4 x <4 x float>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [4 x <4 x float>] %b.coerce, 1 -  %b.coerce.fca.2.extract = extractvalue [4 x <4 x float>] %b.coerce, 2 -  %b.coerce.fca.3.extract = extractvalue [4 x <4 x float>] %b.coerce, 3 -  %1 = bitcast float* %a to i8* -  tail call void @llvm.arm.neon.vst4.v4f32(i8* %1, <4 x float> %b.coerce.fca.0.extract, <4 x float> %b.coerce.fca.1.extract, <4 x float> %b.coerce.fca.2.extract, <4 x float> %b.coerce.fca.3.extract, i32 4) -  ret void -} - -define void @test_vst4q_f64(double* %a, [4 x <2 x double>] %b.coerce) { -; CHECK: test_vst4q_f64 -; CHECK: st4 {v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [4 x <2 x double>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [4 x <2 x double>] %b.coerce, 1 -  %b.coerce.fca.2.extract = extractvalue [4 x <2 x double>] %b.coerce, 2 -  %b.coerce.fca.3.extract = extractvalue [4 x <2 x double>] %b.coerce, 3 -  %1 = bitcast double* %a to i8* -  tail call void @llvm.arm.neon.vst4.v2f64(i8* %1, <2 x double> %b.coerce.fca.0.extract, <2 x double> %b.coerce.fca.1.extract, <2 x double> %b.coerce.fca.2.extract, <2 x double> %b.coerce.fca.3.extract, i32 8) -  ret void -} - -define void @test_vst4_s8(i8* %a, [4 x <8 x i8>] %b.coerce) { -; CHECK: test_vst4_s8 -; CHECK: st4 {v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [4 x <8 x i8>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [4 x <8 x i8>] %b.coerce, 1 -  %b.coerce.fca.2.extract = extractvalue [4 x <8 x i8>] %b.coerce, 2 -  %b.coerce.fca.3.extract = extractvalue [4 x <8 x i8>] %b.coerce, 3 -  tail call void @llvm.arm.neon.vst4.v8i8(i8* %a, <8 x i8> %b.coerce.fca.0.extract, <8 x i8> %b.coerce.fca.1.extract, <8 x i8> %b.coerce.fca.2.extract, <8 x i8> %b.coerce.fca.3.extract, i32 1) -  ret void -} - -define void @test_vst4_s16(i16* %a, [4 x <4 x i16>] %b.coerce) { -; CHECK: test_vst4_s16 -; CHECK: st4 {v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [4 x <4 x i16>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [4 x <4 x i16>] %b.coerce, 1 -  %b.coerce.fca.2.extract = extractvalue [4 x <4 x i16>] %b.coerce, 2 -  %b.coerce.fca.3.extract = extractvalue [4 x <4 x i16>] %b.coerce, 3 -  %1 = bitcast i16* %a to i8* -  tail call void @llvm.arm.neon.vst4.v4i16(i8* %1, <4 x i16> %b.coerce.fca.0.extract, <4 x i16> %b.coerce.fca.1.extract, <4 x i16> %b.coerce.fca.2.extract, <4 x i16> %b.coerce.fca.3.extract, i32 2) -  ret void -} - -define void @test_vst4_s32(i32* %a, [4 x <2 x i32>] %b.coerce) { -; CHECK: test_vst4_s32 -; CHECK: st4 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [4 x <2 x i32>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [4 x <2 x i32>] %b.coerce, 1 -  %b.coerce.fca.2.extract = extractvalue [4 x <2 x i32>] %b.coerce, 2 -  %b.coerce.fca.3.extract = extractvalue [4 x <2 x i32>] %b.coerce, 3 -  %1 = bitcast i32* %a to i8* -  tail call void @llvm.arm.neon.vst4.v2i32(i8* %1, <2 x i32> %b.coerce.fca.0.extract, <2 x i32> %b.coerce.fca.1.extract, <2 x i32> %b.coerce.fca.2.extract, <2 x i32> %b.coerce.fca.3.extract, i32 4) -  ret void -} - -define void @test_vst4_s64(i64* %a, [4 x <1 x i64>] %b.coerce) { -; CHECK: test_vst4_s64 -; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [4 x <1 x i64>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [4 x <1 x i64>] %b.coerce, 1 -  %b.coerce.fca.2.extract = extractvalue [4 x <1 x i64>] %b.coerce, 2 -  %b.coerce.fca.3.extract = extractvalue [4 x <1 x i64>] %b.coerce, 3 -  %1 = bitcast i64* %a to i8* -  tail call void @llvm.arm.neon.vst4.v1i64(i8* %1, <1 x i64> %b.coerce.fca.0.extract, <1 x i64> %b.coerce.fca.1.extract, <1 x i64> %b.coerce.fca.2.extract, <1 x i64> %b.coerce.fca.3.extract, i32 8) -  ret void -} - -define void @test_vst4_f32(float* %a, [4 x <2 x float>] %b.coerce) { -; CHECK: test_vst4_f32 -; CHECK: st4 {v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [4 x <2 x float>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [4 x <2 x float>] %b.coerce, 1 -  %b.coerce.fca.2.extract = extractvalue [4 x <2 x float>] %b.coerce, 2 -  %b.coerce.fca.3.extract = extractvalue [4 x <2 x float>] %b.coerce, 3 -  %1 = bitcast float* %a to i8* -  tail call void @llvm.arm.neon.vst4.v2f32(i8* %1, <2 x float> %b.coerce.fca.0.extract, <2 x float> %b.coerce.fca.1.extract, <2 x float> %b.coerce.fca.2.extract, <2 x float> %b.coerce.fca.3.extract, i32 4) -  ret void -} - -define void @test_vst4_f64(double* %a, [4 x <1 x double>] %b.coerce) { -; CHECK: test_vst4_f64 -; CHECK: st1 {v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d, v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}] -  %b.coerce.fca.0.extract = extractvalue [4 x <1 x double>] %b.coerce, 0 -  %b.coerce.fca.1.extract = extractvalue [4 x <1 x double>] %b.coerce, 1 -  %b.coerce.fca.2.extract = extractvalue [4 x <1 x double>] %b.coerce, 2 -  %b.coerce.fca.3.extract = extractvalue [4 x <1 x double>] %b.coerce, 3 -  %1 = bitcast double* %a to i8* -  tail call void @llvm.arm.neon.vst4.v1f64(i8* %1, <1 x double> %b.coerce.fca.0.extract, <1 x double> %b.coerce.fca.1.extract, <1 x double> %b.coerce.fca.2.extract, <1 x double> %b.coerce.fca.3.extract, i32 8) -  ret void -} - -declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>, i32) -declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) -declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>, i32) -declare void @llvm.arm.neon.vst1.v2i64(i8*, <2 x i64>, i32) -declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) -declare void @llvm.arm.neon.vst1.v2f64(i8*, <2 x double>, i32) -declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) -declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32) -declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>, i32) -declare void @llvm.arm.neon.vst1.v1i64(i8*, <1 x i64>, i32) -declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>, i32) -declare void @llvm.arm.neon.vst1.v1f64(i8*, <1 x double>, i32) -declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>, i32) -declare void @llvm.arm.neon.vst2.v8i16(i8*, <8 x i16>, <8 x i16>, i32) -declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>, i32) -declare void @llvm.arm.neon.vst2.v2i64(i8*, <2 x i64>, <2 x i64>, i32) -declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) -declare void @llvm.arm.neon.vst2.v2f64(i8*, <2 x double>, <2 x double>, i32) -declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) -declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>, i32) -declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>, i32) -declare void @llvm.arm.neon.vst2.v1i64(i8*, <1 x i64>, <1 x i64>, i32) -declare void @llvm.arm.neon.vst2.v2f32(i8*, <2 x float>, <2 x float>, i32) -declare void @llvm.arm.neon.vst2.v1f64(i8*, <1 x double>, <1 x double>, i32) -declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32) -declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) -declare void @llvm.arm.neon.vst3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) -declare void @llvm.arm.neon.vst3.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, i32) -declare void @llvm.arm.neon.vst3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) -declare void @llvm.arm.neon.vst3.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, i32) -declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) -declare void @llvm.arm.neon.vst3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) -declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) -declare void @llvm.arm.neon.vst3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32) -declare void @llvm.arm.neon.vst3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) -declare void @llvm.arm.neon.vst3.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, i32) -declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32) -declare void @llvm.arm.neon.vst4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) -declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) -declare void @llvm.arm.neon.vst4.v2i64(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i32) -declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) -declare void @llvm.arm.neon.vst4.v2f64(i8*, <2 x double>, <2 x double>, <2 x double>, <2 x double>, i32) -declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) -declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) -declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) -declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32) -declare void @llvm.arm.neon.vst4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) -declare void @llvm.arm.neon.vst4.v1f64(i8*, <1 x double>, <1 x double>, <1 x double>, <1 x double>, i32)
\ No newline at end of file diff --git a/test/MC/AArch64/neon-diagnostics.s b/test/MC/AArch64/neon-diagnostics.s index 086d487..9127ed8 100644 --- a/test/MC/AArch64/neon-diagnostics.s +++ b/test/MC/AArch64/neon-diagnostics.s @@ -3880,224 +3880,3 @@  // CHECK-ERROR: error: invalid operand for instruction  // CHECK-ERROR:          frsqrts d8, s22, d18  // CHECK-ERROR:                      ^ - -//---------------------------------------------------------------------- -// Vector load/store multiple N-element structure (class SIMD lselem) -//---------------------------------------------------------------------- -         ld1 {x3}, [x2] -         ld1 {v4}, [x0] -         ld1 {v32.16b}, [x0] -         ld1 {v15.8h}, [x32] -// CHECK-ERROR: error: expected vector type register -// CHECK-ERROR:        ld1 {x3}, [x2] -// CHECK-ERROR:             ^ -// CHECK-ERROR: error: expected vector type register -// CHECK-ERROR:        ld1 {v4}, [x0] -// CHECK-ERROR:             ^ -// CHECK-ERROR: error: expected vector type register -// CHECK-ERROR:        ld1 {v32.16b}, [x0] -// CHECK-ERROR:             ^ -// CHECK-ERROR: error: invalid operand for instruction -// CHECK-ERROR:        ld1 {v15.8h}, [x32] -// CHECK-ERROR:                       ^ - -         ld1 {v0.16b, v2.16b}, [x0] -         ld1 {v0.8h, v1.8h, v2.8h, v3.8h, v4.8h}, [x0] -         ld1 v0.8b, v1.8b}, [x0] -         ld1 {v0.8h-v4.8h}, [x0] -         ld1 {v1.8h-v1.8h}, [x0] -         ld1 {v15.8h-v17.4h}, [x15] -         ld1 {v0.8b-v2.8b, [x0] -// CHECK-ERROR: error: invalid space between two vectors -// CHECK-ERROR:        ld1 {v0.16b, v2.16b}, [x0] -// CHECK-ERROR:                     ^ -// CHECK-ERROR: error: invalid number of vectors -// CHECK-ERROR:        ld1 {v0.8h, v1.8h, v2.8h, v3.8h, v4.8h}, [x0] -// CHECK-ERROR:                                         ^ -// CHECK-ERROR: error: '{' expected -// CHECK-ERROR:        ld1 v0.8b, v1.8b}, [x0] -// CHECK-ERROR:            ^ -// CHECK-ERROR: error: invalid number of vectors -// CHECK-ERROR:        ld1 {v0.8h-v4.8h}, [x0] -// CHECK-ERROR:                   ^ -// CHECK-ERROR: error: invalid number of vectors -// CHECK-ERROR:        ld1 {v1.8h-v1.8h}, [x0] -// CHECK-ERROR:                   ^ -// CHECK-ERROR: error: expected the same vector layout -// CHECK-ERROR:        ld1 {v15.8h-v17.4h}, [x15] -// CHECK-ERROR:                        ^ -// CHECK-ERROR: error: '}' expected -// CHECK-ERROR:        ld1 {v0.8b-v2.8b, [x0] -// CHECK-ERROR:                        ^ - -         ld2 {v15.8h, v16.4h}, [x15] -         ld2 {v0.8b, v2.8b}, [x0] -         ld2 {v15.4h, v16.4h, v17.4h}, [x32] -         ld2 {v15.8h-v16.4h}, [x15] -         ld2 {v0.2d-v2.2d}, [x0] -// CHECK-ERROR: error: invalid space between two vectors -// CHECK-ERROR:        ld2 {v15.8h, v16.4h}, [x15] -// CHECK-ERROR:                     ^ -// CHECK-ERROR: error: invalid space between two vectors -// CHECK-ERROR:        ld2 {v0.8b, v2.8b}, [x0] -// CHECK-ERROR:                    ^ -// CHECK-ERROR: error: invalid operand for instruction -// CHECK-ERROR:        ld2 {v15.4h, v16.4h, v17.4h}, [x32] -// CHECK-ERROR:            ^ -// CHECK-ERROR: error: expected the same vector layout -// CHECK-ERROR:        ld2 {v15.8h-v16.4h}, [x15] -// CHECK-ERROR:                        ^ -// CHECK-ERROR: error: invalid operand for instruction -// CHECK-ERROR:        ld2 {v0.2d-v2.2d}, [x0] -// CHECK-ERROR:            ^ - -         ld3 {v15.8h, v16.8h, v17.4h}, [x15] -         ld3 {v0.8b, v1,8b, v2.8b, v3.8b}, [x0] -         ld3 {v0.8b, v2.8b, v3.8b}, [x0] -         ld3 {v15.8h-v17.4h}, [x15] -         ld3 {v31.4s-v2.4s}, [sp] -// CHECK-ERROR: error: invalid space between two vectors -// CHECK-ERROR:        ld3 {v15.8h, v16.8h, v17.4h}, [x15] -// CHECK-ERROR:                             ^ -// CHECK-ERROR: error: expected vector type register -// CHECK-ERROR:        ld3 {v0.8b, v1,8b, v2.8b, v3.8b}, [x0] -// CHECK-ERROR:                    ^ -// CHECK-ERROR: error: invalid space between two vectors -// CHECK-ERROR:        ld3 {v0.8b, v2.8b, v3.8b}, [x0] -// CHECK-ERROR:                    ^ -// CHECK-ERROR: error: expected the same vector layout -// CHECK-ERROR:        ld3 {v15.8h-v17.4h}, [x15] -// CHECK-ERROR:                        ^ -// CHECK-ERROR: error: invalid operand for instruction -// CHECK-ERROR:        ld3 {v31.4s-v2.4s}, [sp] -// CHECK-ERROR:            ^ - -         ld4 {v15.8h, v16.8h, v17.4h, v18.8h}, [x15] -         ld4 {v0.8b, v2.8b, v3.8b, v4.8b}, [x0] -         ld4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31] -         ld4 {v15.8h-v18.4h}, [x15] -         ld4 {v31.2s-v1.2s}, [x31] -// CHECK-ERROR: error: invalid space between two vectors -// CHECK-ERROR:        ld4 {v15.8h, v16.8h, v17.4h, v18.8h}, [x15] -// CHECK-ERROR:                             ^ -// CHECK-ERROR: error: invalid space between two vectors -// CHECK-ERROR:        ld4 {v0.8b, v2.8b, v3.8b, v4.8b}, [x0] -// CHECK-ERROR:                    ^ -// CHECK-ERROR: error: invalid number of vectors -// CHECK-ERROR:        ld4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31] -// CHECK-ERROR:                                             ^ -// CHECK-ERROR: error: expected the same vector layout -// CHECK-ERROR:        ld4 {v15.8h-v18.4h}, [x15] -// CHECK-ERROR:                        ^ -// CHECK-ERROR: error: invalid operand for instruction -// CHECK-ERROR:        ld4 {v31.2s-v1.2s}, [x31] -// CHECK-ERROR:            ^ - -         st1 {x3}, [x2] -         st1 {v4}, [x0] -         st1 {v32.16b}, [x0] -         st1 {v15.8h}, [x32] -// CHECK-ERROR: error: expected vector type register -// CHECK-ERROR:        st1 {x3}, [x2] -// CHECK-ERROR:             ^ -// CHECK-ERROR: error: expected vector type register -// CHECK-ERROR:        st1 {v4}, [x0] -// CHECK-ERROR:             ^ -// CHECK-ERROR: error: expected vector type register -// CHECK-ERROR:        st1 {v32.16b}, [x0] -// CHECK-ERROR:             ^ -// CHECK-ERROR: error: invalid operand for instruction -// CHECK-ERROR:        st1 {v15.8h}, [x32] -// CHECK-ERROR:                       ^ - -         st1 {v0.16b, v2.16b}, [x0] -         st1 {v0.8h, v1.8h, v2.8h, v3.8h, v4.8h}, [x0] -         st1 v0.8b, v1.8b}, [x0] -         st1 {v0.8h-v4.8h}, [x0] -         st1 {v1.8h-v1.8h}, [x0] -         st1 {v15.8h-v17.4h}, [x15] -         st1 {v0.8b-v2.8b, [x0] -// CHECK-ERROR: error: invalid space between two vectors -// CHECK-ERROR:        st1 {v0.16b, v2.16b}, [x0] -// CHECK-ERROR:                     ^ -// CHECK-ERROR: error: invalid number of vectors -// CHECK-ERROR:        st1 {v0.8h, v1.8h, v2.8h, v3.8h, v4.8h}, [x0] -// CHECK-ERROR:                                         ^ -// CHECK-ERROR: error: '{' expected -// CHECK-ERROR:        st1 v0.8b, v1.8b}, [x0] -// CHECK-ERROR:            ^ -// CHECK-ERROR: error: invalid number of vectors -// CHECK-ERROR:        st1 {v0.8h-v4.8h}, [x0] -// CHECK-ERROR:                   ^ -// CHECK-ERROR: error: invalid number of vectors -// CHECK-ERROR:        st1 {v1.8h-v1.8h}, [x0] -// CHECK-ERROR:                   ^ -// CHECK-ERROR: error: expected the same vector layout -// CHECK-ERROR:        st1 {v15.8h-v17.4h}, [x15] -// CHECK-ERROR:                        ^ -// CHECK-ERROR: error: '}' expected -// CHECK-ERROR:        st1 {v0.8b-v2.8b, [x0] -// CHECK-ERROR:                        ^ - -         st2 {v15.8h, v16.4h}, [x15] -         st2 {v0.8b, v2.8b}, [x0] -         st2 {v15.4h, v16.4h, v17.4h}, [x30] -         st2 {v15.8h-v16.4h}, [x15] -         st2 {v0.2d-v2.2d}, [x0] -// CHECK-ERROR: error: invalid space between two vectors -// CHECK-ERROR:        st2 {v15.8h, v16.4h}, [x15] -// CHECK-ERROR:                     ^ -// CHECK-ERROR: error: invalid space between two vectors -// CHECK-ERROR:        st2 {v0.8b, v2.8b}, [x0] -// CHECK-ERROR:                    ^ -// CHECK-ERROR: error: invalid operand for instruction -// CHECK-ERROR:        st2 {v15.4h, v16.4h, v17.4h}, [x30] -// CHECK-ERROR:            ^ -// CHECK-ERROR: error: expected the same vector layout -// CHECK-ERROR:        st2 {v15.8h-v16.4h}, [x15] -// CHECK-ERROR:                        ^ -// CHECK-ERROR: error: invalid operand for instruction -// CHECK-ERROR:        st2 {v0.2d-v2.2d}, [x0] -// CHECK-ERROR:            ^ - -         st3 {v15.8h, v16.8h, v17.4h}, [x15] -         st3 {v0.8b, v1,8b, v2.8b, v3.8b}, [x0] -         st3 {v0.8b, v2.8b, v3.8b}, [x0] -         st3 {v15.8h-v17.4h}, [x15] -         st3 {v31.4s-v2.4s}, [sp] -// CHECK-ERROR: error: invalid space between two vectors -// CHECK-ERROR:        st3 {v15.8h, v16.8h, v17.4h}, [x15] -// CHECK-ERROR:                             ^ -// CHECK-ERROR: error: expected vector type register -// CHECK-ERROR:        st3 {v0.8b, v1,8b, v2.8b, v3.8b}, [x0] -// CHECK-ERROR:                    ^ -// CHECK-ERROR: error: invalid space between two vectors -// CHECK-ERROR:        st3 {v0.8b, v2.8b, v3.8b}, [x0] -// CHECK-ERROR:                    ^ -// CHECK-ERROR: error: expected the same vector layout -// CHECK-ERROR:        st3 {v15.8h-v17.4h}, [x15] -// CHECK-ERROR:                        ^ -// CHECK-ERROR: error: invalid operand for instruction -// CHECK-ERROR:        st3 {v31.4s-v2.4s}, [sp] -// CHECK-ERROR:            ^ - -         st4 {v15.8h, v16.8h, v17.4h, v18.8h}, [x15] -         st4 {v0.8b, v2.8b, v3.8b, v4.8b}, [x0] -         st4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31] -         st4 {v15.8h-v18.4h}, [x15] -         st4 {v31.2s-v1.2s}, [x31] -// CHECK-ERROR: error: invalid space between two vectors -// CHECK-ERROR:        st4 {v15.8h, v16.8h, v17.4h, v18.8h}, [x15] -// CHECK-ERROR:                             ^ -// CHECK-ERROR: error: invalid space between two vectors -// CHECK-ERROR:        st4 {v0.8b, v2.8b, v3.8b, v4.8b}, [x0] -// CHECK-ERROR:                    ^ -// CHECK-ERROR: error: invalid number of vectors -// CHECK-ERROR:        st4 {v15.4h, v16.4h, v17.4h, v18.4h, v19.4h}, [x31] -// CHECK-ERROR:                                             ^ -// CHECK-ERROR: error: expected the same vector layout -// CHECK-ERROR:        st4 {v15.8h-v18.4h}, [x15] -// CHECK-ERROR:                        ^ -// CHECK-ERROR: error: invalid operand for instruction -// CHECK-ERROR:        st4 {v31.2s-v1.2s}, [x31] -// CHECK-ERROR:            ^ diff --git a/test/MC/AArch64/neon-simd-ldst-multi-elem.s b/test/MC/AArch64/neon-simd-ldst-multi-elem.s deleted file mode 100644 index 05fe4da..0000000 --- a/test/MC/AArch64/neon-simd-ldst-multi-elem.s +++ /dev/null @@ -1,463 +0,0 @@ -// RUN: llvm-mc -triple=aarch64 -mattr=+neon -show-encoding < %s | FileCheck %s - -// Check that the assembler can handle the documented syntax for AArch64 - -//------------------------------------------------------------------------------ -// Store multiple 1-element structures from one register -//------------------------------------------------------------------------------ -         st1 {v0.16b}, [x0] -         st1 {v15.8h}, [x15] -         st1 {v31.4s}, [sp] -         st1 {v0.2d}, [x0] -         st1 {v0.8b}, [x0] -         st1 {v15.4h}, [x15] -         st1 {v31.2s}, [sp] -         st1 {v0.1d}, [x0] -// CHECK:	st1	{v0.16b}, [x0]          // encoding: [0x00,0x70,0x00,0x4c] -// CHECK:	st1	{v15.8h}, [x15]         // encoding: [0xef,0x75,0x00,0x4c] -// CHECK:	st1	{v31.4s}, [sp]          // encoding: [0xff,0x7b,0x00,0x4c] -// CHECK:	st1	{v0.2d}, [x0]           // encoding: [0x00,0x7c,0x00,0x4c] -// CHECK:	st1	{v0.8b}, [x0]           // encoding: [0x00,0x70,0x00,0x0c] -// CHECK:	st1	{v15.4h}, [x15]         // encoding: [0xef,0x75,0x00,0x0c] -// CHECK:	st1	{v31.2s}, [sp]          // encoding: [0xff,0x7b,0x00,0x0c] -// CHECK:	st1	{v0.1d}, [x0]           // encoding: [0x00,0x7c,0x00,0x0c] - -//------------------------------------------------------------------------------ -// Store multiple 1-element structures from two consecutive registers -//------------------------------------------------------------------------------ -         st1 {v0.16b, v1.16b}, [x0] -         st1 {v15.8h, v16.8h}, [x15] -         st1 {v31.4s, v0.4s}, [sp] -         st1 {v0.2d, v1.2d}, [x0] -         st1 {v0.8b, v1.8b}, [x0] -         st1 {v15.4h, v16.4h}, [x15] -         st1 {v31.2s, v0.2s}, [sp] -         st1 {v0.1d, v1.1d}, [x0] -// CHECK:	st1	{v0.16b, v1.16b}, [x0]  // encoding: [0x00,0xa0,0x00,0x4c] -// CHECK:	st1	{v15.8h, v16.8h}, [x15] // encoding: [0xef,0xa5,0x00,0x4c] -// CHECK:	st1	{v31.4s, v0.4s}, [sp]   // encoding: [0xff,0xab,0x00,0x4c] -// CHECK:	st1	{v0.2d, v1.2d}, [x0]    // encoding: [0x00,0xac,0x00,0x4c] -// CHECK:	st1	{v0.8b, v1.8b}, [x0]    // encoding: [0x00,0xa0,0x00,0x0c] -// CHECK:	st1	{v15.4h, v16.4h}, [x15] // encoding: [0xef,0xa5,0x00,0x0c] -// CHECK:	st1	{v31.2s, v0.2s}, [sp]   // encoding: [0xff,0xab,0x00,0x0c] -// CHECK:	st1	{v0.1d, v1.1d}, [x0]    // encoding: [0x00,0xac,0x00,0x0c] - -         st1 {v0.16b-v1.16b}, [x0] -         st1 {v15.8h-v16.8h}, [x15] -         st1 {v31.4s-v0.4s}, [sp] -         st1 {v0.2d-v1.2d}, [x0] -         st1 {v0.8b-v1.8b}, [x0] -         st1 {v15.4h-v16.4h}, [x15] -         st1 {v31.2s-v0.2s}, [sp] -         st1 {v0.1d-v1.1d}, [x0] -// CHECK:	st1	{v0.16b, v1.16b}, [x0]  // encoding: [0x00,0xa0,0x00,0x4c] -// CHECK:	st1	{v15.8h, v16.8h}, [x15] // encoding: [0xef,0xa5,0x00,0x4c] -// CHECK:	st1	{v31.4s, v0.4s}, [sp]   // encoding: [0xff,0xab,0x00,0x4c] -// CHECK:	st1	{v0.2d, v1.2d}, [x0]    // encoding: [0x00,0xac,0x00,0x4c] -// CHECK:	st1	{v0.8b, v1.8b}, [x0]    // encoding: [0x00,0xa0,0x00,0x0c] -// CHECK:	st1	{v15.4h, v16.4h}, [x15] // encoding: [0xef,0xa5,0x00,0x0c] -// CHECK:	st1	{v31.2s, v0.2s}, [sp]   // encoding: [0xff,0xab,0x00,0x0c] -// CHECK:	st1	{v0.1d, v1.1d}, [x0]    // encoding: [0x00,0xac,0x00,0x0c] - -//------------------------------------------------------------------------------ -// Store multiple 1-element structures from three consecutive registers -//------------------------------------------------------------------------------ -         st1 {v0.16b, v1.16b, v2.16b}, [x0] -         st1 {v15.8h, v16.8h, v17.8h}, [x15] -         st1 {v31.4s, v0.4s, v1.4s}, [sp] -         st1 {v0.2d, v1.2d, v2.2d}, [x0] -         st1 {v0.8b, v1.8b, v2.8b}, [x0] -         st1 {v15.4h, v16.4h, v17.4h}, [x15] -         st1 {v31.2s, v0.2s, v1.2s}, [sp] -         st1 {v0.1d, v1.1d, v2.1d}, [x0] -// CHECK:	st1	{v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x60,0x00,0x4c] -// CHECK:	st1	{v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x65,0x00,0x4c] -// CHECK:	st1	{v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x6b,0x00,0x4c] -// CHECK:	st1	{v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x6c,0x00,0x4c] -// CHECK:	st1	{v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x60,0x00,0x0c] -// CHECK:	st1	{v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x65,0x00,0x0c] -// CHECK:	st1	{v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x6b,0x00,0x0c] -// CHECK:	st1	{v0.1d, v1.1d, v2.1d}, [x0] // encoding: [0x00,0x6c,0x00,0x0c] - -         st1 {v0.16b-v2.16b}, [x0] -         st1 {v15.8h-v17.8h}, [x15] -         st1 {v31.4s-v1.4s}, [sp] -         st1 {v0.2d-v2.2d}, [x0] -         st1 {v0.8b-v2.8b}, [x0] -         st1 {v15.4h-v17.4h}, [x15] -         st1 {v31.2s-v1.2s}, [sp] -         st1 {v0.1d-v2.1d}, [x0] -// CHECK:	st1	{v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x60,0x00,0x4c] -// CHECK:	st1	{v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x65,0x00,0x4c] -// CHECK:	st1	{v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x6b,0x00,0x4c] -// CHECK:	st1	{v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x6c,0x00,0x4c] -// CHECK:	st1	{v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x60,0x00,0x0c] -// CHECK:	st1	{v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x65,0x00,0x0c] -// CHECK:	st1	{v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x6b,0x00,0x0c] -// CHECK:	st1	{v0.1d, v1.1d, v2.1d}, [x0] // encoding: [0x00,0x6c,0x00,0x0c] - -//------------------------------------------------------------------------------ -// Store multiple 1-element structures from four consecutive registers -//------------------------------------------------------------------------------ -         st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0] -         st1 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15] -         st1 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp] -         st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0] -         st1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0] -         st1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15] -         st1 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp] -         st1 {v0.1d, v1.1d, v2.1d, v3.1d}, [x0] -// CHECK:	st1	{v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x20,0x00,0x4c] -// CHECK:	st1	{v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x25,0x00,0x4c] -// CHECK:	st1	{v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x2b,0x00,0x4c] -// CHECK:	st1	{v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x2c,0x00,0x4c] -// CHECK:	st1	{v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x20,0x00,0x0c] -// CHECK:	st1	{v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x25,0x00,0x0c] -// CHECK:	st1	{v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x2b,0x00,0x0c] -// CHECK:	st1	{v0.1d, v1.1d, v2.1d, v3.1d}, [x0] // encoding: [0x00,0x2c,0x00,0x0c] - -         st1 {v0.16b-v3.16b}, [x0] -         st1 {v15.8h-v18.8h}, [x15] -         st1 {v31.4s-v2.4s}, [sp] -         st1 {v0.2d-v3.2d}, [x0] -         st1 {v0.8b-v3.8b}, [x0] -         st1 {v15.4h-v18.4h}, [x15] -         st1 {v31.2s-v2.2s}, [sp] -         st1 {v0.1d-v3.1d}, [x0] -// CHECK:	st1	{v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x20,0x00,0x4c] -// CHECK:	st1	{v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x25,0x00,0x4c] -// CHECK:	st1	{v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x2b,0x00,0x4c] -// CHECK:	st1	{v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x2c,0x00,0x4c] -// CHECK:	st1	{v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x20,0x00,0x0c] -// CHECK:	st1	{v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x25,0x00,0x0c] -// CHECK:	st1	{v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x2b,0x00,0x0c] -// CHECK:	st1	{v0.1d, v1.1d, v2.1d, v3.1d}, [x0] // encoding: [0x00,0x2c,0x00,0x0c] - -//------------------------------------------------------------------------------ -// Store multiple 2-element structures from two consecutive registers -//------------------------------------------------------------------------------ -         st2 {v0.16b, v1.16b}, [x0] -         st2 {v15.8h, v16.8h}, [x15] -         st2 {v31.4s, v0.4s}, [sp] -         st2 {v0.2d, v1.2d}, [x0] -         st2 {v0.8b, v1.8b}, [x0] -         st2 {v15.4h, v16.4h}, [x15] -         st2 {v31.2s, v0.2s}, [sp] -// CHECK:	st2	{v0.16b, v1.16b}, [x0]  // encoding: [0x00,0x80,0x00,0x4c] -// CHECK:	st2	{v15.8h, v16.8h}, [x15] // encoding: [0xef,0x85,0x00,0x4c] -// CHECK:	st2	{v31.4s, v0.4s}, [sp]   // encoding: [0xff,0x8b,0x00,0x4c] -// CHECK:	st2	{v0.2d, v1.2d}, [x0]    // encoding: [0x00,0x8c,0x00,0x4c] -// CHECK:	st2	{v0.8b, v1.8b}, [x0]    // encoding: [0x00,0x80,0x00,0x0c] -// CHECK:	st2	{v15.4h, v16.4h}, [x15] // encoding: [0xef,0x85,0x00,0x0c] -// CHECK:	st2	{v31.2s, v0.2s}, [sp]   // encoding: [0xff,0x8b,0x00,0x0c] - -         st2 {v0.16b-v1.16b}, [x0] -         st2 {v15.8h-v16.8h}, [x15] -         st2 {v31.4s-v0.4s}, [sp] -         st2 {v0.2d-v1.2d}, [x0] -         st2 {v0.8b-v1.8b}, [x0] -         st2 {v15.4h-v16.4h}, [x15] -         st2 {v31.2s-v0.2s}, [sp] -// CHECK:	st2	{v0.16b, v1.16b}, [x0]  // encoding: [0x00,0x80,0x00,0x4c] -// CHECK:	st2	{v15.8h, v16.8h}, [x15] // encoding: [0xef,0x85,0x00,0x4c] -// CHECK:	st2	{v31.4s, v0.4s}, [sp]   // encoding: [0xff,0x8b,0x00,0x4c] -// CHECK:	st2	{v0.2d, v1.2d}, [x0]    // encoding: [0x00,0x8c,0x00,0x4c] -// CHECK:	st2	{v0.8b, v1.8b}, [x0]    // encoding: [0x00,0x80,0x00,0x0c] -// CHECK:	st2	{v15.4h, v16.4h}, [x15] // encoding: [0xef,0x85,0x00,0x0c] -// CHECK:	st2	{v31.2s, v0.2s}, [sp]   // encoding: [0xff,0x8b,0x00,0x0c] - -//------------------------------------------------------------------------------ -// Store multiple 3-element structures from three consecutive registers -//------------------------------------------------------------------------------ -         st3 {v0.16b, v1.16b, v2.16b}, [x0] -         st3 {v15.8h, v16.8h, v17.8h}, [x15] -         st3 {v31.4s, v0.4s, v1.4s}, [sp] -         st3 {v0.2d, v1.2d, v2.2d}, [x0] -         st3 {v0.8b, v1.8b, v2.8b}, [x0] -         st3 {v15.4h, v16.4h, v17.4h}, [x15] -         st3 {v31.2s, v0.2s, v1.2s}, [sp] -// CHECK:	st3	{v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x40,0x00,0x4c] -// CHECK:	st3	{v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x45,0x00,0x4c] -// CHECK:	st3	{v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x4b,0x00,0x4c] -// CHECK:	st3	{v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x4c,0x00,0x4c] -// CHECK:	st3	{v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x40,0x00,0x0c] -// CHECK:	st3	{v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x45,0x00,0x0c] -// CHECK:	st3	{v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x4b,0x00,0x0c] - -         st3 {v0.16b-v2.16b}, [x0] -         st3 {v15.8h-v17.8h}, [x15] -         st3 {v31.4s-v1.4s}, [sp] -         st3 {v0.2d-v2.2d}, [x0] -         st3 {v0.8b-v2.8b}, [x0] -         st3 {v15.4h-v17.4h}, [x15] -         st3 {v31.2s-v1.2s}, [sp] -// CHECK:	st3	{v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x40,0x00,0x4c] -// CHECK:	st3	{v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x45,0x00,0x4c] -// CHECK:	st3	{v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x4b,0x00,0x4c] -// CHECK:	st3	{v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x4c,0x00,0x4c] -// CHECK:	st3	{v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x40,0x00,0x0c] -// CHECK:	st3	{v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x45,0x00,0x0c] -// CHECK:	st3	{v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x4b,0x00,0x0c] - -//------------------------------------------------------------------------------ -// Store multiple 4-element structures from four consecutive registers -//------------------------------------------------------------------------------ -         st4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0] -         st4 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15] -         st4 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp] -         st4 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0] -         st4 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0] -         st4 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15] -         st4 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp] -// CHECK:	st4	{v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x00,0x00,0x4c] -// CHECK:	st4	{v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x05,0x00,0x4c] -// CHECK:	st4	{v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x0b,0x00,0x4c] -// CHECK:	st4	{v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x0c,0x00,0x4c] -// CHECK:	st4	{v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x00,0x00,0x0c] -// CHECK:	st4	{v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x05,0x00,0x0c] -// CHECK:	st4	{v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x0b,0x00,0x0c] - -         st4 {v0.16b-v3.16b}, [x0] -         st4 {v15.8h-v18.8h}, [x15] -         st4 {v31.4s-v2.4s}, [sp] -         st4 {v0.2d-v3.2d}, [x0] -         st4 {v0.8b-v3.8b}, [x0] -         st4 {v15.4h-v18.4h}, [x15] -         st4 {v31.2s-v2.2s}, [sp] -// CHECK:	st4	{v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x00,0x00,0x4c] -// CHECK:	st4	{v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x05,0x00,0x4c] -// CHECK:	st4	{v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x0b,0x00,0x4c] -// CHECK:	st4	{v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x0c,0x00,0x4c] -// CHECK:	st4	{v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x00,0x00,0x0c] -// CHECK:	st4	{v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x05,0x00,0x0c] -// CHECK:	st4	{v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x0b,0x00,0x0c] - -//------------------------------------------------------------------------------ -// Load multiple 1-element structures to one register -//------------------------------------------------------------------------------ -         ld1 {v0.16b}, [x0] -         ld1 {v15.8h}, [x15] -         ld1 {v31.4s}, [sp] -         ld1 {v0.2d}, [x0] -         ld1 {v0.8b}, [x0] -         ld1 {v15.4h}, [x15] -         ld1 {v31.2s}, [sp] -         ld1 {v0.1d}, [x0] -// CHECK:	ld1	{v0.16b}, [x0]          // encoding: [0x00,0x70,0x40,0x4c] -// CHECK:	ld1	{v15.8h}, [x15]         // encoding: [0xef,0x75,0x40,0x4c] -// CHECK:	ld1	{v31.4s}, [sp]          // encoding: [0xff,0x7b,0x40,0x4c] -// CHECK:	ld1	{v0.2d}, [x0]           // encoding: [0x00,0x7c,0x40,0x4c] -// CHECK:	ld1	{v0.8b}, [x0]           // encoding: [0x00,0x70,0x40,0x0c] -// CHECK:	ld1	{v15.4h}, [x15]         // encoding: [0xef,0x75,0x40,0x0c] -// CHECK:	ld1	{v31.2s}, [sp]          // encoding: [0xff,0x7b,0x40,0x0c] -// CHECK:	ld1	{v0.1d}, [x0]           // encoding: [0x00,0x7c,0x40,0x0c] - -//------------------------------------------------------------------------------ -// Load multiple 1-element structures to two consecutive registers -//------------------------------------------------------------------------------ -         ld1 {v0.16b, v1.16b}, [x0] -         ld1 {v15.8h, v16.8h}, [x15] -         ld1 {v31.4s, v0.4s}, [sp] -         ld1 {v0.2d, v1.2d}, [x0] -         ld1 {v0.8b, v1.8b}, [x0] -         ld1 {v15.4h, v16.4h}, [x15] -         ld1 {v31.2s, v0.2s}, [sp] -         ld1 {v0.1d, v1.1d}, [x0] -// CHECK:	ld1	{v0.16b, v1.16b}, [x0]  // encoding: [0x00,0xa0,0x40,0x4c] -// CHECK:	ld1	{v15.8h, v16.8h}, [x15] // encoding: [0xef,0xa5,0x40,0x4c] -// CHECK:	ld1	{v31.4s, v0.4s}, [sp]   // encoding: [0xff,0xab,0x40,0x4c] -// CHECK:	ld1	{v0.2d, v1.2d}, [x0]    // encoding: [0x00,0xac,0x40,0x4c] -// CHECK:	ld1	{v0.8b, v1.8b}, [x0]    // encoding: [0x00,0xa0,0x40,0x0c] -// CHECK:	ld1	{v15.4h, v16.4h}, [x15] // encoding: [0xef,0xa5,0x40,0x0c] -// CHECK:	ld1	{v31.2s, v0.2s}, [sp]   // encoding: [0xff,0xab,0x40,0x0c] -// CHECK:	ld1	{v0.1d, v1.1d}, [x0]    // encoding: [0x00,0xac,0x40,0x0c] - -         ld1 {v0.16b-v1.16b}, [x0] -         ld1 {v15.8h-v16.8h}, [x15] -         ld1 {v31.4s-v0.4s}, [sp] -         ld1 {v0.2d-v1.2d}, [x0] -         ld1 {v0.8b-v1.8b}, [x0] -         ld1 {v15.4h-v16.4h}, [x15] -         ld1 {v31.2s-v0.2s}, [sp] -         ld1 {v0.1d-v1.1d}, [x0] -// CHECK:	ld1	{v0.16b, v1.16b}, [x0]  // encoding: [0x00,0xa0,0x40,0x4c] -// CHECK:	ld1	{v15.8h, v16.8h}, [x15] // encoding: [0xef,0xa5,0x40,0x4c] -// CHECK:	ld1	{v31.4s, v0.4s}, [sp]   // encoding: [0xff,0xab,0x40,0x4c] -// CHECK:	ld1	{v0.2d, v1.2d}, [x0]    // encoding: [0x00,0xac,0x40,0x4c] -// CHECK:	ld1	{v0.8b, v1.8b}, [x0]    // encoding: [0x00,0xa0,0x40,0x0c] -// CHECK:	ld1	{v15.4h, v16.4h}, [x15] // encoding: [0xef,0xa5,0x40,0x0c] -// CHECK:	ld1	{v31.2s, v0.2s}, [sp]   // encoding: [0xff,0xab,0x40,0x0c] -// CHECK:	ld1	{v0.1d, v1.1d}, [x0]    // encoding: [0x00,0xac,0x40,0x0c] - -//------------------------------------------------------------------------------ -// Load multiple 1-element structures to three consecutive registers -//------------------------------------------------------------------------------ -         ld1 {v0.16b, v1.16b, v2.16b}, [x0] -         ld1 {v15.8h, v16.8h, v17.8h}, [x15] -         ld1 {v31.4s, v0.4s, v1.4s}, [sp] -         ld1 {v0.2d, v1.2d, v2.2d}, [x0] -         ld1 {v0.8b, v1.8b, v2.8b}, [x0] -         ld1 {v15.4h, v16.4h, v17.4h}, [x15] -         ld1 {v31.2s, v0.2s, v1.2s}, [sp] -         ld1 {v0.1d, v1.1d, v2.1d}, [x0] -// CHECK:	ld1	{v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x60,0x40,0x4c] -// CHECK:	ld1	{v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x65,0x40,0x4c] -// CHECK:	ld1	{v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x6b,0x40,0x4c] -// CHECK:	ld1	{v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x6c,0x40,0x4c] -// CHECK:	ld1	{v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x60,0x40,0x0c] -// CHECK:	ld1	{v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x65,0x40,0x0c] -// CHECK:	ld1	{v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x6b,0x40,0x0c] -// CHECK:	ld1	{v0.1d, v1.1d, v2.1d}, [x0] // encoding: [0x00,0x6c,0x40,0x0c] - -         ld1 {v0.16b-v2.16b}, [x0] -         ld1 {v15.8h-v17.8h}, [x15] -         ld1 {v31.4s-v1.4s}, [sp] -         ld1 {v0.2d-v2.2d}, [x0] -         ld1 {v0.8b-v2.8b}, [x0] -         ld1 {v15.4h-v17.4h}, [x15] -         ld1 {v31.2s-v1.2s}, [sp] -         ld1 {v0.1d-v2.1d}, [x0] -// CHECK:	ld1	{v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x60,0x40,0x4c] -// CHECK:	ld1	{v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x65,0x40,0x4c] -// CHECK:	ld1	{v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x6b,0x40,0x4c] -// CHECK:	ld1	{v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x6c,0x40,0x4c] -// CHECK:	ld1	{v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x60,0x40,0x0c] -// CHECK:	ld1	{v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x65,0x40,0x0c] -// CHECK:	ld1	{v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x6b,0x40,0x0c] -// CHECK:	ld1	{v0.1d, v1.1d, v2.1d}, [x0] // encoding: [0x00,0x6c,0x40,0x0c] - -//------------------------------------------------------------------------------ -// Load multiple 1-element structures to four consecutive registers -//------------------------------------------------------------------------------ -         ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0] -         ld1 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15] -         ld1 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp] -         ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0] -         ld1 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0] -         ld1 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15] -         ld1 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp] -         ld1 {v0.1d, v1.1d, v2.1d, v3.1d}, [x0] -// CHECK:	ld1	{v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x20,0x40,0x4c] -// CHECK:	ld1	{v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x25,0x40,0x4c] -// CHECK:	ld1	{v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x2b,0x40,0x4c] -// CHECK:	ld1	{v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x2c,0x40,0x4c] -// CHECK:	ld1	{v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x20,0x40,0x0c] -// CHECK:	ld1	{v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x25,0x40,0x0c] -// CHECK:	ld1	{v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x2b,0x40,0x0c] -// CHECK:	ld1	{v0.1d, v1.1d, v2.1d, v3.1d}, [x0] // encoding: [0x00,0x2c,0x40,0x0c] - -         ld1 {v0.16b-v3.16b}, [x0] -         ld1 {v15.8h-v18.8h}, [x15] -         ld1 {v31.4s-v2.4s}, [sp] -         ld1 {v0.2d-v3.2d}, [x0] -         ld1 {v0.8b-v3.8b}, [x0] -         ld1 {v15.4h-v18.4h}, [x15] -         ld1 {v31.2s-v2.2s}, [sp] -         ld1 {v0.1d-v3.1d}, [x0] -// CHECK:	ld1	{v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x20,0x40,0x4c] -// CHECK:	ld1	{v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x25,0x40,0x4c] -// CHECK:	ld1	{v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x2b,0x40,0x4c] -// CHECK:	ld1	{v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x2c,0x40,0x4c] -// CHECK:	ld1	{v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x20,0x40,0x0c] -// CHECK:	ld1	{v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x25,0x40,0x0c] -// CHECK:	ld1	{v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x2b,0x40,0x0c] -// CHECK:	ld1	{v0.1d, v1.1d, v2.1d, v3.1d}, [x0] // encoding: [0x00,0x2c,0x40,0x0c] - -//------------------------------------------------------------------------------ -// Load multiple 4-element structures to two consecutive registers -//------------------------------------------------------------------------------ -         ld2 {v0.16b, v1.16b}, [x0] -         ld2 {v15.8h, v16.8h}, [x15] -         ld2 {v31.4s, v0.4s}, [sp] -         ld2 {v0.2d, v1.2d}, [x0] -         ld2 {v0.8b, v1.8b}, [x0] -         ld2 {v15.4h, v16.4h}, [x15] -         ld2 {v31.2s, v0.2s}, [sp] -// CHECK:	ld2	{v0.16b, v1.16b}, [x0]  // encoding: [0x00,0x80,0x40,0x4c] -// CHECK:	ld2	{v15.8h, v16.8h}, [x15] // encoding: [0xef,0x85,0x40,0x4c] -// CHECK:	ld2	{v31.4s, v0.4s}, [sp]   // encoding: [0xff,0x8b,0x40,0x4c] -// CHECK:	ld2	{v0.2d, v1.2d}, [x0]    // encoding: [0x00,0x8c,0x40,0x4c] -// CHECK:	ld2	{v0.8b, v1.8b}, [x0]    // encoding: [0x00,0x80,0x40,0x0c] -// CHECK:	ld2	{v15.4h, v16.4h}, [x15] // encoding: [0xef,0x85,0x40,0x0c] -// CHECK:	ld2	{v31.2s, v0.2s}, [sp]   // encoding: [0xff,0x8b,0x40,0x0c] - -         ld2 {v0.16b-v1.16b}, [x0] -         ld2 {v15.8h-v16.8h}, [x15] -         ld2 {v31.4s-v0.4s}, [sp] -         ld2 {v0.2d-v1.2d}, [x0] -         ld2 {v0.8b-v1.8b}, [x0] -         ld2 {v15.4h-v16.4h}, [x15] -         ld2 {v31.2s-v0.2s}, [sp] -// CHECK:	ld2	{v0.16b, v1.16b}, [x0]  // encoding: [0x00,0x80,0x40,0x4c] -// CHECK:	ld2	{v15.8h, v16.8h}, [x15] // encoding: [0xef,0x85,0x40,0x4c] -// CHECK:	ld2	{v31.4s, v0.4s}, [sp]   // encoding: [0xff,0x8b,0x40,0x4c] -// CHECK:	ld2	{v0.2d, v1.2d}, [x0]    // encoding: [0x00,0x8c,0x40,0x4c] -// CHECK:	ld2	{v0.8b, v1.8b}, [x0]    // encoding: [0x00,0x80,0x40,0x0c] -// CHECK:	ld2	{v15.4h, v16.4h}, [x15] // encoding: [0xef,0x85,0x40,0x0c] -// CHECK:	ld2	{v31.2s, v0.2s}, [sp]   // encoding: [0xff,0x8b,0x40,0x0c] - -//------------------------------------------------------------------------------ -// Load multiple 3-element structures to three consecutive registers -//------------------------------------------------------------------------------ -         ld3 {v0.16b, v1.16b, v2.16b}, [x0] -         ld3 {v15.8h, v16.8h, v17.8h}, [x15] -         ld3 {v31.4s, v0.4s, v1.4s}, [sp] -         ld3 {v0.2d, v1.2d, v2.2d}, [x0] -         ld3 {v0.8b, v1.8b, v2.8b}, [x0] -         ld3 {v15.4h, v16.4h, v17.4h}, [x15] -         ld3 {v31.2s, v0.2s, v1.2s}, [sp] -// CHECK:	ld3	{v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x40,0x40,0x4c] -// CHECK:	ld3	{v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x45,0x40,0x4c] -// CHECK:	ld3	{v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x4b,0x40,0x4c] -// CHECK:	ld3	{v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x4c,0x40,0x4c] -// CHECK:	ld3	{v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x40,0x40,0x0c] -// CHECK:	ld3	{v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x45,0x40,0x0c] -// CHECK:	ld3	{v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x4b,0x40,0x0c] - -         ld3 {v0.16b-v2.16b}, [x0] -         ld3 {v15.8h-v17.8h}, [x15] -         ld3 {v31.4s-v1.4s}, [sp] -         ld3 {v0.2d-v2.2d}, [x0] -         ld3 {v0.8b-v2.8b}, [x0] -         ld3 {v15.4h-v17.4h}, [x15] -         ld3 {v31.2s-v1.2s}, [sp] -// CHECK:	ld3	{v0.16b, v1.16b, v2.16b}, [x0] // encoding: [0x00,0x40,0x40,0x4c] -// CHECK:	ld3	{v15.8h, v16.8h, v17.8h}, [x15] // encoding: [0xef,0x45,0x40,0x4c] -// CHECK:	ld3	{v31.4s, v0.4s, v1.4s}, [sp] // encoding: [0xff,0x4b,0x40,0x4c] -// CHECK:	ld3	{v0.2d, v1.2d, v2.2d}, [x0] // encoding: [0x00,0x4c,0x40,0x4c] -// CHECK:	ld3	{v0.8b, v1.8b, v2.8b}, [x0] // encoding: [0x00,0x40,0x40,0x0c] -// CHECK:	ld3	{v15.4h, v16.4h, v17.4h}, [x15] // encoding: [0xef,0x45,0x40,0x0c] -// CHECK:	ld3	{v31.2s, v0.2s, v1.2s}, [sp] // encoding: [0xff,0x4b,0x40,0x0c] - -//------------------------------------------------------------------------------ -// Load multiple 4-element structures to four consecutive registers -//------------------------------------------------------------------------------ -         ld4 {v0.16b, v1.16b, v2.16b, v3.16b}, [x0] -         ld4 {v15.8h, v16.8h, v17.8h, v18.8h}, [x15] -         ld4 {v31.4s, v0.4s, v1.4s, v2.4s}, [sp] -         ld4 {v0.2d, v1.2d, v2.2d, v3.2d}, [x0] -         ld4 {v0.8b, v1.8b, v2.8b, v3.8b}, [x0] -         ld4 {v15.4h, v16.4h, v17.4h, v18.4h}, [x15] -         ld4 {v31.2s, v0.2s, v1.2s, v2.2s}, [sp] -// CHECK:	ld4	{v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x00,0x40,0x4c] -// CHECK:	ld4	{v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x05,0x40,0x4c] -// CHECK:	ld4	{v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x0b,0x40,0x4c] -// CHECK:	ld4	{v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x0c,0x40,0x4c] -// CHECK:	ld4	{v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x00,0x40,0x0c] -// CHECK:	ld4	{v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x05,0x40,0x0c] -// CHECK:	ld4	{v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x0b,0x40,0x0c] - -         ld4 {v0.16b-v3.16b}, [x0] -         ld4 {v15.8h-v18.8h}, [x15] -         ld4 {v31.4s-v2.4s}, [sp] -         ld4 {v0.2d-v3.2d}, [x0] -         ld4 {v0.8b-v3.8b}, [x0] -         ld4 {v15.4h-v18.4h}, [x15] -         ld4 {v31.2s-v2.2s}, [sp] -// CHECK:	ld4	{v0.16b, v1.16b, v2.16b, v3.16b}, [x0] // encoding: [0x00,0x00,0x40,0x4c] -// CHECK:	ld4	{v15.8h, v16.8h, v17.8h, v18.8h}, [x15] // encoding: [0xef,0x05,0x40,0x4c] -// CHECK:	ld4	{v31.4s, v0.4s, v1.4s, v2.4s}, [sp] // encoding: [0xff,0x0b,0x40,0x4c] -// CHECK:	ld4	{v0.2d, v1.2d, v2.2d, v3.2d}, [x0] // encoding: [0x00,0x0c,0x40,0x4c] -// CHECK:	ld4	{v0.8b, v1.8b, v2.8b, v3.8b}, [x0] // encoding: [0x00,0x00,0x40,0x0c] -// CHECK:	ld4	{v15.4h, v16.4h, v17.4h, v18.4h}, [x15] // encoding: [0xef,0x05,0x40,0x0c] -// CHECK:	ld4	{v31.2s, v0.2s, v1.2s, v2.2s}, [sp] // encoding: [0xff,0x0b,0x40,0x0c] | 
