7 files changed, 59 insertions, 17 deletions
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 588570f..8e8d3ff 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -161,11 +161,11 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
 { ARM::VLD1q8PseudoWB_fixed, ARM::VLD1q8wb_fixed,true,false, false, SingleSpc,  2, 8 ,false},
 { ARM::VLD1q8PseudoWB_register, ARM::VLD1q8wb_register,true,true, true,SingleSpc,2,8,false},
 
-{ ARM::VLD2DUPd16Pseudo,     ARM::VLD2DUPd16,     true, false, false, SingleSpc, 2, 4,true},
+{ ARM::VLD2DUPd16Pseudo,     ARM::VLD2DUPd16,     true, false, false, SingleSpc, 2, 4,false},
 { ARM::VLD2DUPd16Pseudo_UPD, ARM::VLD2DUPd16_UPD, true, true, true,  SingleSpc, 2, 4,true},
-{ ARM::VLD2DUPd32Pseudo,     ARM::VLD2DUPd32,     true, false, false, SingleSpc, 2, 2,true},
+{ ARM::VLD2DUPd32Pseudo,     ARM::VLD2DUPd32,     true, false, false, SingleSpc, 2, 2,false},
 { ARM::VLD2DUPd32Pseudo_UPD, ARM::VLD2DUPd32_UPD, true, true, true,  SingleSpc, 2, 2,true},
-{ ARM::VLD2DUPd8Pseudo,      ARM::VLD2DUPd8,      true, false, false, SingleSpc, 2, 8,true},
+{ ARM::VLD2DUPd8Pseudo,      ARM::VLD2DUPd8,      true, false, false, SingleSpc, 2, 8,false},
 { ARM::VLD2DUPd8Pseudo_UPD,  ARM::VLD2DUPd8_UPD, true, true, true,  SingleSpc, 2, 8,true},
 
 { ARM::VLD2LNd16Pseudo,     ARM::VLD2LNd16,     true, false, false, SingleSpc,  2, 4 ,true},
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index acf1287..2a3c736 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -143,6 +143,16 @@ def VecListTwoDAllLanesAsmOperand : AsmOperandClass {
 def VecListTwoDAllLanes : RegisterOperand<DPR, "printVectorListTwoAllLanes"> {
   let ParserMatchClass = VecListTwoDAllLanesAsmOperand;
 }
+// Register list of two D registers spaced by 2 (two sequential Q registers).
+def VecListTwoQAllLanesAsmOperand : AsmOperandClass {
+  let Name = "VecListTwoQAllLanes";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListOperands";
+}
+def VecListTwoQAllLanes : RegisterOperand<DPR,
+                                         "printVectorListTwoSpacedAllLanes"> {
+  let ParserMatchClass = VecListTwoQAllLanesAsmOperand;
+}
 
 // Register list of one D register, with byte lane subscripting.
 def VecListOneDByteIndexAsmOperand : AsmOperandClass {
@@ -1221,27 +1231,27 @@ def VLD1DUPq16PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>;
 def VLD1DUPq32PseudoWB_register : VLDQWBregisterPseudo<IIC_VLD1dupu>;
 
 //   VLD2DUP  : Vector Load (single 2-element structure to all lanes)
-class VLD2DUP<bits<4> op7_4, string Dt>
-  : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2),
+class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy>
+  : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd),
           (ins addrmode6dup:$Rn), IIC_VLD2dup,
-          "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> {
+          "vld2", Dt, "$Vd, $Rn", "", []> {
   let Rm = 0b1111;
   let Inst{4} = Rn{4};
   let DecoderMethod = "DecodeVLD2DupInstruction";
 }
 
-def VLD2DUPd8  : VLD2DUP<{0,0,0,?}, "8">;
-def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16">;
-def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32">;
+def VLD2DUPd8  : VLD2DUP<{0,0,0,?}, "8",  VecListTwoDAllLanes>;
+def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListTwoDAllLanes>;
+def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListTwoDAllLanes>;
 
 def VLD2DUPd8Pseudo  : VLDQPseudo<IIC_VLD2dup>;
 def VLD2DUPd16Pseudo : VLDQPseudo<IIC_VLD2dup>;
 def VLD2DUPd32Pseudo : VLDQPseudo<IIC_VLD2dup>;
 
 // ...with double-spaced registers (not used for codegen):
-def VLD2DUPd8x2  : VLD2DUP<{0,0,1,?}, "8">;
-def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16">;
-def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32">;
+def VLD2DUPd8x2  : VLD2DUP<{0,0,1,?}, "8",  VecListTwoQAllLanes>;
+def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListTwoQAllLanes>;
+def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListTwoQAllLanes>;
 
 // ...with address register writeback:
 class VLD2DUPWB<bits<4> op7_4, string Dt>
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 667418c..75e273a 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -1118,13 +1118,24 @@ public:
     return VectorList.Count == 2;
   }
 
+  bool isSingleSpacedVectorAllLanes() const {
+    return Kind == k_VectorListAllLanes && !VectorList.isDoubleSpaced;
+  }
+  bool isDoubleSpacedVectorAllLanes() const {
+    return Kind == k_VectorListAllLanes && VectorList.isDoubleSpaced;
+  }
   bool isVecListOneDAllLanes() const {
-    if (Kind != k_VectorListAllLanes) return false;
+    if (!isSingleSpacedVectorAllLanes()) return false;
     return VectorList.Count == 1;
   }
 
   bool isVecListTwoDAllLanes() const {
-    if (Kind != k_VectorListAllLanes) return false;
+    if (!isSingleSpacedVectorAllLanes()) return false;
+    return VectorList.Count == 2;
+  }
+
+  bool isVecListTwoQAllLanes() const {
+    if (!isDoubleSpacedVectorAllLanes()) return false;
     return VectorList.Count == 2;
   }
 
@@ -2041,10 +2052,12 @@ public:
   }
 
   static ARMOperand *CreateVectorListAllLanes(unsigned RegNum, unsigned Count,
+                                              bool isDoubleSpaced,
                                               SMLoc S, SMLoc E) {
     ARMOperand *Op = new ARMOperand(k_VectorListAllLanes);
     Op->VectorList.RegNum = RegNum;
     Op->VectorList.Count = Count;
+    Op->VectorList.isDoubleSpaced = isDoubleSpaced;
     Op->StartLoc = S;
     Op->EndLoc = E;
     return Op;
@@ -2865,7 +2878,8 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
         break;
       case AllLanes:
         E = Parser.getTok().getLoc();
-        Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 1, S, E));
+        Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 1, false,
+                                                                S, E));
         break;
       case IndexedLane:
         Operands.push_back(ARMOperand::CreateVectorListIndexed(Reg, 1,
@@ -2889,7 +2903,8 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
         break;
       case AllLanes:
         E = Parser.getTok().getLoc();
-        Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 2, S, E));
+        Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 2, false,
+                                                                S, E));
         break;
       case IndexedLane:
         Operands.push_back(ARMOperand::CreateVectorListIndexed(Reg, 2,
@@ -3060,6 +3075,7 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
     break;
   case AllLanes:
     Operands.push_back(ARMOperand::CreateVectorListAllLanes(FirstReg, Count,
+                                                            (Spacing == 2),
                                                             S, E));
     break;
   case IndexedLane:
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index f92bc55..641836a 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -1066,3 +1066,13 @@ void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
     << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "}";
 }
 
+void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI,
+                                                      unsigned OpNum,
+                                                      raw_ostream &O) {
+  // Normally, it's not safe to use register enum values directly with
+  // addition to get the next register, but for VFP registers, the
+  // sort order is guaranteed because they're all of the form D<n>.
+  O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], "
+    << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[]}";
+}
+
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index 05db2d2..e5d1367 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -139,6 +139,8 @@ public:
                                   raw_ostream &O);
   void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
                                 raw_ostream &O);
+  void printVectorListTwoSpacedAllLanes(const MCInst *MI, unsigned OpNum,
+                                        raw_ostream &O);
 };
 
 } // end namespace llvm
diff --git a/test/MC/ARM/neon-vld-encoding.s b/test/MC/ARM/neon-vld-encoding.s
index 6e6dda6..d61fb77 100644
--- a/test/MC/ARM/neon-vld-encoding.s
+++ b/test/MC/ARM/neon-vld-encoding.s
@@ -236,6 +236,8 @@
         vld2.8 {d2[4], d3[4]}, [r2], r3
         vld2.8 {d2[4], d3[4]}, [r2]!
         vld2.8 {d2[4], d3[4]}, [r2]
+        vld2.32 {d22[], d23[]}, [r1]
+        vld2.32 {d22[], d24[]}, [r1]
 
 @ CHECK: vld2.8	{d16[1], d17[1]}, [r0, :16] @ encoding: [0x3f,0x01,0xe0,0xf4]
 @ CHECK: vld2.16 {d16[1], d17[1]}, [r0, :32] @ encoding: [0x5f,0x05,0xe0,0xf4]
@@ -246,7 +248,8 @@
 @ CHECK: vld2.8	{d2[4], d3[4]}, [r2], r3 @ encoding: [0x83,0x21,0xa2,0xf4]
 @ CHECK: vld2.8	{d2[4], d3[4]}, [r2]!   @ encoding: [0x8d,0x21,0xa2,0xf4]
 @ CHECK: vld2.8	{d2[4], d3[4]}, [r2]    @ encoding: [0x8f,0x21,0xa2,0xf4]
-
+@ CHECK: vld2.32 {d22[], d23[]}, [r1]    @ encoding: [0x8f,0x6d,0xe1,0xf4]
+@ CHECK: vld2.32 {d22[], d24[]}, [r1]    @ encoding: [0xaf,0x6d,0xe1,0xf4]
 
 
 @	vld3.8	{d16[1], d17[1], d18[1]}, [r0]
diff --git a/utils/TableGen/EDEmitter.cpp b/utils/TableGen/EDEmitter.cpp
index 6c3cae2..4a12206 100644
--- a/utils/TableGen/EDEmitter.cpp
+++ b/utils/TableGen/EDEmitter.cpp
@@ -578,6 +578,7 @@ static int ARMFlagFromOpName(LiteralConstantEmitter *type,
   REG("VecListTwoQ");
   REG("VecListOneDAllLanes");
   REG("VecListTwoDAllLanes");
+  REG("VecListTwoQAllLanes");
 
   IMM("i32imm");
   IMM("i32imm_hilo16");