13 files changed, 284 insertions, 86 deletions
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index 5f18c4e..9a9bb99 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -236,6 +236,13 @@ public:
     virtual void NodeUpdated(SDNode *N);
   };
 
+  /// NewNodesMustHaveLegalTypes - When true, additional steps are taken to
+  /// ensure that getConstant() and similar functions return DAG nodes that
+  /// have legal types. This is important after type legalization since
+  /// any illegally typed nodes generated after this point will not experience
+  /// type legalization.
+  bool NewNodesMustHaveLegalTypes;
+
 private:
   /// DAGUpdateListener is a friend so it can manipulate the listener stack.
   friend struct DAGUpdateListener;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index fb0e63e..bac06cc 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -871,7 +871,8 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
 SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
   : TM(tm), TSI(*tm.getSelectionDAGInfo()), TTI(0), OptLevel(OL),
     EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)),
-    Root(getEntryNode()), UpdateListeners(0) {
+    Root(getEntryNode()), NewNodesMustHaveLegalTypes(false),
+    UpdateListeners(0) {
   AllNodes.push_back(&EntryNode);
   DbgInfo = new SDDbgInfo();
 }
@@ -983,6 +984,54 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) {
    APInt NewVal = Elt->getValue().zext(EltVT.getSizeInBits());
    Elt = ConstantInt::get(*getContext(), NewVal);
   }
+  // In other cases the element type is illegal and needs to be expanded, for
+  // example v2i64 on MIPS32. In this case, find the nearest legal type, split
+  // the value into n parts and use a vector type with n-times the elements.
+  // Then bitcast to the type requested.
+  // Legalizing constants too early makes the DAGCombiner's job harder so we
+  // only legalize if the DAG tells us we must produce legal types.
+  else if (NewNodesMustHaveLegalTypes && VT.isVector() &&
+           TLI->getTypeAction(*getContext(), EltVT) ==
+           TargetLowering::TypeExpandInteger) {
+    APInt NewVal = Elt->getValue();
+    EVT ViaEltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
+    unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits();
+    unsigned ViaVecNumElts = VT.getSizeInBits() / ViaEltSizeInBits;
+    EVT ViaVecVT = EVT::getVectorVT(*getContext(), ViaEltVT, ViaVecNumElts);
+
+    // Check the temporary vector is the correct size. If this fails then
+    // getTypeToTransformTo() probably returned a type whose size (in bits)
+    // isn't a power-of-2 factor of the requested type size.
+    assert(ViaVecVT.getSizeInBits() == VT.getSizeInBits());
+
+    SmallVector<SDValue, 2> EltParts;
+    for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) {
+      EltParts.push_back(getConstant(NewVal.lshr(i * ViaEltSizeInBits)
+                                           .trunc(ViaEltSizeInBits),
+                                     ViaEltVT, isT));
+    }
+
+    // EltParts is currently in little endian order. If we actually want
+    // big-endian order then reverse it now.
+    if (TLI->isBigEndian())
+      std::reverse(EltParts.begin(), EltParts.end());
+
+    // The elements must be reversed when the element order is different
+    // to the endianness of the elements (because the BITCAST is itself a
+    // vector shuffle in this situation). However, we do not need any code to
+    // perform this reversal because getConstant() is producing a vector
+    // splat.
+    // This situation occurs in MIPS MSA.
+
+    SmallVector<SDValue, 8> Ops;
+    for (unsigned i = 0; i < VT.getVectorNumElements(); ++i)
+      Ops.insert(Ops.end(), EltParts.begin(), EltParts.end());
+
+    SDValue Result = getNode(ISD::BITCAST, SDLoc(), VT,
+                             getNode(ISD::BUILD_VECTOR, SDLoc(), ViaVecVT,
+                                     &Ops[0], Ops.size()));
+    return Result;
+  }
 
   assert(Elt->getBitWidth() == EltVT.getSizeInBits() &&
          "APInt size does not match type size!");
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index c937996..8bf8756 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -666,6 +666,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
           << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump());
   }
 
+  CurDAG->NewNodesMustHaveLegalTypes = true;
+
   if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName);
 
   {
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index 9585efb..e2ea629 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -1254,38 +1254,73 @@ static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
   return Result;
 }
 
-static SDValue
-lowerMSASplatImm(SDLoc DL, EVT ResTy, SDValue ImmOp, SelectionDAG &DAG) {
-  EVT ViaVecTy = ResTy;
-  SmallVector<SDValue, 16> Ops;
-  SDValue ImmHiOp;
-
-  if (ViaVecTy == MVT::v2i64) {
-    ImmHiOp = DAG.getNode(ISD::SRA, DL, MVT::i32, ImmOp,
-                          DAG.getConstant(31, MVT::i32));
-    for (unsigned i = 0; i < ViaVecTy.getVectorNumElements(); ++i) {
-      Ops.push_back(ImmHiOp);
-      Ops.push_back(ImmOp);
-    }
+static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) {
+  EVT ResVecTy = Op->getValueType(0);
+  EVT ViaVecTy = ResVecTy;
+  SDLoc DL(Op);
+
+  // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and
+  // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating
+  // lanes.
+  SDValue LaneA;
+  SDValue LaneB = Op->getOperand(2);
+
+  if (ResVecTy == MVT::v2i64) {
+    LaneA = DAG.getConstant(0, MVT::i32);
     ViaVecTy = MVT::v4i32;
-  } else {
-    for (unsigned i = 0; i < ResTy.getVectorNumElements(); ++i)
-      Ops.push_back(ImmOp);
-  }
+  } else
+    LaneA = LaneB;
 
-  SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, DL, ViaVecTy, &Ops[0],
-                               Ops.size());
+  SDValue Ops[16] = { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB,
+                      LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB };
 
-  if (ResTy != ViaVecTy)
-    Result = DAG.getNode(ISD::BITCAST, DL, ResTy, Result);
+  SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, DL, ViaVecTy, Ops,
+                               ViaVecTy.getVectorNumElements());
+
+  if (ViaVecTy != ResVecTy)
+    Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy, Result);
 
   return Result;
 }
 
-static SDValue
-lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG) {
-  return lowerMSASplatImm(SDLoc(Op), Op->getValueType(0),
-                          Op->getOperand(ImmOp), DAG);
+static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG) {
+  return DAG.getConstant(Op->getConstantOperandVal(ImmOp), Op->getValueType(0));
+}
+
+static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue,
+                                   bool BigEndian, SelectionDAG &DAG) {
+  EVT ViaVecTy = VecTy;
+  SDValue SplatValueA = SplatValue;
+  SDValue SplatValueB = SplatValue;
+  SDLoc DL(SplatValue);
+
+  if (VecTy == MVT::v2i64) {
+    // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's.
+    ViaVecTy = MVT::v4i32;
+
+    SplatValueA = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValue);
+    SplatValueB = DAG.getNode(ISD::SRL, DL, MVT::i64, SplatValue,
+                              DAG.getConstant(32, MVT::i32));
+    SplatValueB = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValueB);
+  }
+
+  // We currently hold the parts in little endian order. Swap them if
+  // necessary.
+  if (BigEndian)
+    std::swap(SplatValueA, SplatValueB);
+
+  SDValue Ops[16] = { SplatValueA, SplatValueB, SplatValueA, SplatValueB,
+                      SplatValueA, SplatValueB, SplatValueA, SplatValueB,
+                      SplatValueA, SplatValueB, SplatValueA, SplatValueB,
+                      SplatValueA, SplatValueB, SplatValueA, SplatValueB };
+
+  SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, DL, ViaVecTy, Ops,
+                               ViaVecTy.getVectorNumElements());
+
+  if (VecTy != ViaVecTy)
+    Result = DAG.getNode(ISD::BITCAST, DL, VecTy, Result);
+
+  return Result;
 }
 
 static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG,
@@ -1295,27 +1330,37 @@ static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG,
   SDValue Exp2Imm;
   SDLoc DL(Op);
 
-  // The DAG Combiner can't constant fold bitcasted vectors so we must do it
-  // here.
+  // The DAG Combiner can't constant fold bitcasted vectors yet so we must do it
+  // here for now.
   if (VecTy == MVT::v2i64) {
     if (ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(Imm)) {
       APInt BitImm = APInt(64, 1) << CImm->getAPIntValue();
 
       SDValue BitImmHiOp = DAG.getConstant(BitImm.lshr(32).trunc(32), MVT::i32);
-      SDValue BitImmOp = DAG.getConstant(BitImm.trunc(32), MVT::i32);
-      Exp2Imm = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64,
-                            DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32,
-                                        BitImmHiOp, BitImmOp,
-                                        BitImmHiOp, BitImmOp));
+      SDValue BitImmLoOp = DAG.getConstant(BitImm.trunc(32), MVT::i32);
+
+      if (BigEndian)
+        std::swap(BitImmLoOp, BitImmHiOp);
+
+      Exp2Imm =
+          DAG.getNode(ISD::BITCAST, DL, MVT::v2i64,
+                      DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, BitImmLoOp,
+                                  BitImmHiOp, BitImmLoOp, BitImmHiOp));
     }
   }
 
   if (Exp2Imm.getNode() == NULL) {
     // We couldnt constant fold, do a vector shift instead
-    SDValue One = lowerMSASplatImm(DL, VecTy, DAG.getConstant(1, MVT::i32),
-                                   DAG);
-    Exp2Imm = lowerMSASplatImm(DL, VecTy, Imm, DAG);
-    Exp2Imm = DAG.getNode(ISD::SHL, DL, VecTy, One, Exp2Imm);
+
+    // Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since
+    // only values 0-63 are valid.
+    if (VecTy == MVT::v2i64)
+      Imm = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Imm);
+
+    Exp2Imm = getBuildVectorSplat(VecTy, Imm, BigEndian, DAG);
+
+    Exp2Imm =
+        DAG.getNode(ISD::SHL, DL, VecTy, DAG.getConstant(1, VecTy), Exp2Imm);
   }
 
   return DAG.getNode(Opc, DL, VecTy, Op->getOperand(1), Exp2Imm);
@@ -1325,7 +1370,7 @@ static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) {
   EVT ResTy = Op->getValueType(0);
   EVT ViaVecTy = ResTy == MVT::v2i64 ? MVT::v4i32 : ResTy;
   SDLoc DL(Op);
-  SDValue One = lowerMSASplatImm(DL, ResTy, DAG.getConstant(1, MVT::i32), DAG);
+  SDValue One = DAG.getConstant(1, ResTy);
   SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, Op->getOperand(2));
 
   SDValue AllOnes = DAG.getConstant(-1, MVT::i32);
@@ -1346,15 +1391,9 @@ static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) {
 static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) {
   SDLoc DL(Op);
   EVT ResTy = Op->getValueType(0);
-  SDValue SHAmount = Op->getOperand(2);
-  EVT ImmTy = SHAmount->getValueType(0);
-  SDValue Bit =
-      DAG.getNode(ISD::SHL, DL, ImmTy, DAG.getConstant(1, ImmTy), SHAmount);
-  SDValue BitMask = DAG.getNOT(DL, Bit, ImmTy);
-
-  assert(ResTy.getVectorNumElements() <= 16);
-
-  BitMask = lowerMSASplatImm(DL, ResTy, BitMask, DAG);
+  APInt BitImm = APInt(ResTy.getVectorElementType().getSizeInBits(), 1)
+                 << cast<ConstantSDNode>(Op->getOperand(2))->getAPIntValue();
+  SDValue BitMask = DAG.getConstant(~BitImm, ResTy);
 
   return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), BitMask);
 }
@@ -1469,8 +1508,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
   case Intrinsic::mips_bneg_w:
   case Intrinsic::mips_bneg_d: {
     EVT VecTy = Op->getValueType(0);
-    SDValue One = lowerMSASplatImm(DL, VecTy, DAG.getConstant(1, MVT::i32),
-                                   DAG);
+    SDValue One = DAG.getConstant(1, VecTy);
 
     return DAG.getNode(ISD::XOR, DL, VecTy, Op->getOperand(1),
                        DAG.getNode(ISD::SHL, DL, VecTy, One,
@@ -1504,8 +1542,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
   case Intrinsic::mips_bset_w:
   case Intrinsic::mips_bset_d: {
     EVT VecTy = Op->getValueType(0);
-    SDValue One = lowerMSASplatImm(DL, VecTy, DAG.getConstant(1, MVT::i32),
-                                   DAG);
+    SDValue One = DAG.getConstant(1, VecTy);
 
     return DAG.getNode(ISD::OR, DL, VecTy, Op->getOperand(1),
                        DAG.getNode(ISD::SHL, DL, VecTy, One,
@@ -1926,7 +1963,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
     // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32.
     // Instead we lower to MipsISD::VSHF and match from there.
     return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0),
-                       lowerMSASplatImm(Op, 2, DAG), Op->getOperand(1),
+                       lowerMSASplatZExt(Op, 2, DAG), Op->getOperand(1),
                        Op->getOperand(1));
   case Intrinsic::mips_splati_b:
   case Intrinsic::mips_splati_h:
@@ -2200,15 +2237,10 @@ SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op,
       return SDValue();
     }
 
-    SmallVector<SDValue, 16> Ops;
-    SDValue Constant = DAG.getConstant(SplatValue.sextOrSelf(32), MVT::i32);
-
-    for (unsigned i = 0; i < ViaVecTy.getVectorNumElements(); ++i)
-      Ops.push_back(Constant);
-
-    SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Node), ViaVecTy,
-                                 &Ops[0], Ops.size());
+    // SelectionDAG::getConstant will promote SplatValue appropriately.
+    SDValue Result = DAG.getConstant(SplatValue, ViaVecTy);
 
+    // Bitcast to the type we originally wanted
     if (ViaVecTy != ResTy)
       Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
 
diff --git a/test/CodeGen/Mips/msa/3r-b.ll b/test/CodeGen/Mips/msa/3r-b.ll
index 7c45b2b..a05d19b 100644
--- a/test/CodeGen/Mips/msa/3r-b.ll
+++ b/test/CodeGen/Mips/msa/3r-b.ll
@@ -3,7 +3,6 @@
 
 ; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
 ; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
-; XFAIL: *
 
 @llvm_mips_bclr_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
 @llvm_mips_bclr_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
diff --git a/test/CodeGen/Mips/msa/bit.ll b/test/CodeGen/Mips/msa/bit.ll
index dc8bb8f..59ddbe1 100644
--- a/test/CodeGen/Mips/msa/bit.ll
+++ b/test/CodeGen/Mips/msa/bit.ll
@@ -1,7 +1,3 @@
-; Both endians should emit the same output for immediate instructions.
-; This is not currently true.
-; XFAIL: *
-
 ; Test the MSA intrinsics that are encoded with the BIT instruction format.
 
 ; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
diff --git a/test/CodeGen/Mips/msa/elm_shift_slide.ll b/test/CodeGen/Mips/msa/elm_shift_slide.ll
index 3b3504b..39d670d 100644
--- a/test/CodeGen/Mips/msa/elm_shift_slide.ll
+++ b/test/CodeGen/Mips/msa/elm_shift_slide.ll
@@ -1,7 +1,3 @@
-; Both endians should emit the same output for immediate instructions.
-; This is not currently true.
-; XFAIL: *
-
 ; Test the MSA intrinsics that are encoded with the ELM instruction format and
 ; are either shifts or slides.
 
diff --git a/test/CodeGen/Mips/msa/i5-a.ll b/test/CodeGen/Mips/msa/i5-a.ll
index f45df07..0b50720 100644
--- a/test/CodeGen/Mips/msa/i5-a.ll
+++ b/test/CodeGen/Mips/msa/i5-a.ll
@@ -1,7 +1,3 @@
-; Both endians should emit the same output for immediate instructions.
-; This is not currently true.
-; XFAIL: *
-
 ; Test the MSA intrinsics that are encoded with the I5 instruction format.
 ; There are lots of these so this covers those beginning with 'a'
 
diff --git a/test/CodeGen/Mips/msa/i5-b.ll b/test/CodeGen/Mips/msa/i5-b.ll
index f4477a0..da6be66 100644
--- a/test/CodeGen/Mips/msa/i5-b.ll
+++ b/test/CodeGen/Mips/msa/i5-b.ll
@@ -3,7 +3,6 @@
 
 ; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
 ; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
-; XFAIL: *
 
 @llvm_mips_bclri_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
 @llvm_mips_bclri_b_RES  = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
diff --git a/test/CodeGen/Mips/msa/i5-c.ll b/test/CodeGen/Mips/msa/i5-c.ll
index d63b3cc..bf1578f 100644
--- a/test/CodeGen/Mips/msa/i5-c.ll
+++ b/test/CodeGen/Mips/msa/i5-c.ll
@@ -1,7 +1,3 @@
-; Both endians should emit the same output for immediate instructions.
-; This is not currently true.
-; XFAIL: *
-
 ; Test the MSA intrinsics that are encoded with the I5 instruction format.
 ; There are lots of these so this covers those beginning with 'c'
 
diff --git a/test/CodeGen/Mips/msa/i5-m.ll b/test/CodeGen/Mips/msa/i5-m.ll
index 74e698b..2766349 100644
--- a/test/CodeGen/Mips/msa/i5-m.ll
+++ b/test/CodeGen/Mips/msa/i5-m.ll
@@ -1,7 +1,3 @@
-; Both endians should emit the same output for immediate instructions.
-; This is not currently true.
-; XFAIL: *
-
 ; Test the MSA intrinsics that are encoded with the I5 instruction format.
 ; There are lots of these so this covers those beginning with 'm'
 
diff --git a/test/CodeGen/Mips/msa/i5-s.ll b/test/CodeGen/Mips/msa/i5-s.ll
index 60ba8e1..184172f 100644
--- a/test/CodeGen/Mips/msa/i5-s.ll
+++ b/test/CodeGen/Mips/msa/i5-s.ll
@@ -1,7 +1,3 @@
-; Both endians should emit the same output for immediate instructions.
-; This is not currently true.
-; XFAIL: *
-
 ; Test the MSA intrinsics that are encoded with the I5 instruction format.
 ; There are lots of these so this covers those beginning with 's'
 
diff --git a/test/CodeGen/Mips/msa/llvm-stress-s1704963983.ll b/test/CodeGen/Mips/msa/llvm-stress-s1704963983.ll
new file mode 100644
index 0000000..4beaaa9
--- /dev/null
+++ b/test/CodeGen/Mips/msa/llvm-stress-s1704963983.ll
@@ -0,0 +1,134 @@
+; RUN: llc -march=mips < %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s
+; RUN: llc -march=mipsel < %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s
+
+; This test originally failed for MSA with a
+; "Unexpected illegal type!" assertion.
+; It should at least successfully build.
+
+define void @autogen_SD1704963983(i8*, i32*, i64*, i32, i64, i8) {
+BB:
+  %A4 = alloca <4 x double>
+  %A3 = alloca <8 x i64>
+  %A2 = alloca <1 x double>
+  %A1 = alloca double
+  %A = alloca i32
+  %L = load i8* %0
+  store i8 77, i8* %0
+  %E = extractelement <8 x i64> zeroinitializer, i32 2
+  %Shuff = shufflevector <8 x i64> zeroinitializer, <8 x i64> zeroinitializer, <8 x i32> <i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15, i32 1, i32 3>
+  %I = insertelement <8 x i64> zeroinitializer, i64 %E, i32 7
+  %Sl = select i1 false, i8* %0, i8* %0
+  %Cmp = icmp eq i32 434069, 272505
+  br label %CF
+
+CF:                                               ; preds = %CF, %CF78, %BB
+  %L5 = load i8* %Sl
+  store i8 %L, i8* %Sl
+  %E6 = extractelement <8 x i32> zeroinitializer, i32 2
+  %Shuff7 = shufflevector <8 x i64> zeroinitializer, <8 x i64> %Shuff, <8 x i32> <i32 13, i32 15, i32 1, i32 3, i32 5, i32 7, i32 9, i32 undef>
+  %I8 = insertelement <8 x i64> zeroinitializer, i64 %4, i32 7
+  %B = shl <1 x i16> zeroinitializer, zeroinitializer
+  %FC = sitofp <8 x i64> zeroinitializer to <8 x float>
+  %Sl9 = select i1 %Cmp, i8 77, i8 77
+  %Cmp10 = icmp uge <8 x i64> %Shuff, zeroinitializer
+  %L11 = load i8* %0
+  store i8 %Sl9, i8* %0
+  %E12 = extractelement <1 x i16> zeroinitializer, i32 0
+  %Shuff13 = shufflevector <8 x i64> zeroinitializer, <8 x i64> %Shuff, <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 undef, i32 3, i32 5, i32 7>
+  %I14 = insertelement <4 x i32> zeroinitializer, i32 %3, i32 3
+  %B15 = udiv <1 x i16> %B, zeroinitializer
+  %Tr = trunc <8 x i64> %Shuff to <8 x i32>
+  %Sl16 = select i1 %Cmp, i8 77, i8 %5
+  %Cmp17 = icmp ult <8 x i1> %Cmp10, %Cmp10
+  %L18 = load i8* %Sl
+  store i8 -1, i8* %Sl
+  %E19 = extractelement <8 x i32> zeroinitializer, i32 3
+  %Shuff20 = shufflevector <8 x float> %FC, <8 x float> %FC, <8 x i32> <i32 6, i32 8, i32 undef, i32 12, i32 14, i32 0, i32 2, i32 undef>
+  %I21 = insertelement <8 x i64> %Shuff13, i64 %E, i32 0
+  %B22 = urem <8 x i64> %Shuff7, %I21
+  %FC23 = sitofp i32 50347 to float
+  %Sl24 = select i1 %Cmp, double 0.000000e+00, double 0.000000e+00
+  %Cmp25 = icmp ugt i32 465489, 47533
+  br i1 %Cmp25, label %CF, label %CF78
+
+CF78:                                             ; preds = %CF
+  %L26 = load i8* %Sl
+  store i32 50347, i32* %A
+  %E27 = extractelement <8 x i1> %Cmp10, i32 2
+  br i1 %E27, label %CF, label %CF77
+
+CF77:                                             ; preds = %CF77, %CF81, %CF78
+  %Shuff28 = shufflevector <8 x i64> zeroinitializer, <8 x i64> %Shuff, <8 x i32> <i32 13, i32 15, i32 1, i32 3, i32 5, i32 7, i32 9, i32 undef>
+  %I29 = insertelement <1 x i16> zeroinitializer, i16 -1, i32 0
+  %B30 = urem <8 x i32> %Tr, zeroinitializer
+  %Tr31 = trunc i32 0 to i16
+  %Sl32 = select i1 %Cmp, <2 x i1> zeroinitializer, <2 x i1> zeroinitializer
+  %L33 = load i8* %Sl
+  store i8 %L26, i8* %Sl
+  %E34 = extractelement <4 x i32> zeroinitializer, i32 0
+  %Shuff35 = shufflevector <1 x i16> zeroinitializer, <1 x i16> %B, <1 x i32> undef
+  %I36 = insertelement <8 x i64> %Shuff28, i64 %E, i32 7
+  %B37 = srem <1 x i16> %I29, zeroinitializer
+  %FC38 = sitofp <8 x i32> %B30 to <8 x double>
+  %Sl39 = select i1 %Cmp, double 0.000000e+00, double %Sl24
+  %L40 = load i8* %Sl
+  store i8 %Sl16, i8* %Sl
+  %E41 = extractelement <1 x i16> zeroinitializer, i32 0
+  %Shuff42 = shufflevector <8 x i1> %Cmp17, <8 x i1> %Cmp10, <8 x i32> <i32 14, i32 undef, i32 2, i32 4, i32 undef, i32 8, i32 10, i32 12>
+  %I43 = insertelement <4 x i32> zeroinitializer, i32 272505, i32 0
+  %B44 = urem <8 x i32> %B30, %Tr
+  %PC = bitcast i8* %0 to i64*
+  %Sl45 = select i1 %Cmp, <8 x i1> %Cmp10, <8 x i1> %Shuff42
+  %Cmp46 = fcmp ugt float 0xB856238A00000000, 0x47DA795E40000000
+  br i1 %Cmp46, label %CF77, label %CF80
+
+CF80:                                             ; preds = %CF80, %CF77
+  %L47 = load i64* %PC
+  store i8 77, i8* %Sl
+  %E48 = extractelement <8 x i64> zeroinitializer, i32 2
+  %Shuff49 = shufflevector <8 x i64> zeroinitializer, <8 x i64> %Shuff7, <8 x i32> <i32 5, i32 7, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 3>
+  %I50 = insertelement <8 x i64> zeroinitializer, i64 %L47, i32 7
+  %B51 = fdiv float 0x46CC2D8000000000, %FC23
+  %PC52 = bitcast <8 x i64>* %A3 to i64*
+  %Sl53 = select i1 %Cmp, <8 x i64> %Shuff, <8 x i64> %Shuff
+  %Cmp54 = fcmp ole float 0x47DA795E40000000, 0xB856238A00000000
+  br i1 %Cmp54, label %CF80, label %CF81
+
+CF81:                                             ; preds = %CF80
+  %L55 = load i8* %Sl
+  store i8 %Sl16, i8* %Sl
+  %E56 = extractelement <1 x i16> %B, i32 0
+  %Shuff57 = shufflevector <1 x i16> zeroinitializer, <1 x i16> zeroinitializer, <1 x i32> <i32 1>
+  %I58 = insertelement <8 x i64> zeroinitializer, i64 %L47, i32 7
+  %B59 = srem i32 %E19, %E19
+  %Sl60 = select i1 %Cmp, i8 77, i8 77
+  %Cmp61 = icmp ult <1 x i16> zeroinitializer, %B
+  %L62 = load i8* %Sl
+  store i64 %L47, i64* %PC52
+  %E63 = extractelement <4 x i32> %I43, i32 2
+  %Shuff64 = shufflevector <4 x i1> zeroinitializer, <4 x i1> zeroinitializer, <4 x i32> <i32 undef, i32 undef, i32 1, i32 3>
+  %I65 = insertelement <8 x i64> %B22, i64 %L47, i32 7
+  %B66 = add <8 x i64> %I50, %I65
+  %FC67 = uitofp i16 %E12 to float
+  %Sl68 = select i1 %Cmp, <8 x i32> %B30, <8 x i32> zeroinitializer
+  %Cmp69 = fcmp ord double 0.000000e+00, 0.000000e+00
+  br i1 %Cmp69, label %CF77, label %CF79
+
+CF79:                                             ; preds = %CF81
+  %L70 = load i32* %A
+  store i64 %4, i64* %PC
+  %E71 = extractelement <4 x i32> zeroinitializer, i32 0
+  %Shuff72 = shufflevector <8 x i32> zeroinitializer, <8 x i32> %B44, <8 x i32> <i32 11, i32 undef, i32 15, i32 1, i32 3, i32 undef, i32 7, i32 9>
+  %I73 = insertelement <8 x i16> zeroinitializer, i16 %E12, i32 5
+  %B74 = fsub double 0.000000e+00, 0.000000e+00
+  %Sl75 = select i1 %Cmp46, i32 %E6, i32 %E19
+  %Cmp76 = icmp ugt <4 x i32> %I43, zeroinitializer
+  store i8 %L, i8* %Sl
+  store i64 %L47, i64* %PC
+  store i64 %L47, i64* %PC
+  store i8 %L5, i8* %Sl
+  store i8 %L5, i8* %0
+  ret void
+}