aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/llvm/CodeGen/SelectionDAG.h7
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp51
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp2
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.cpp150
-rw-r--r--test/CodeGen/Mips/msa/3r-b.ll1
-rw-r--r--test/CodeGen/Mips/msa/bit.ll4
-rw-r--r--test/CodeGen/Mips/msa/elm_shift_slide.ll4
-rw-r--r--test/CodeGen/Mips/msa/i5-a.ll4
-rw-r--r--test/CodeGen/Mips/msa/i5-b.ll1
-rw-r--r--test/CodeGen/Mips/msa/i5-c.ll4
-rw-r--r--test/CodeGen/Mips/msa/i5-m.ll4
-rw-r--r--test/CodeGen/Mips/msa/i5-s.ll4
-rw-r--r--test/CodeGen/Mips/msa/llvm-stress-s1704963983.ll134
13 files changed, 284 insertions, 86 deletions
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index 5f18c4e..9a9bb99 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -236,6 +236,13 @@ public:
virtual void NodeUpdated(SDNode *N);
};
+ /// NewNodesMustHaveLegalTypes - When true, additional steps are taken to
+ /// ensure that getConstant() and similar functions return DAG nodes that
+ /// have legal types. This is important after type legalization since
+ /// any illegally typed nodes generated after this point will not experience
+ /// type legalization.
+ bool NewNodesMustHaveLegalTypes;
+
private:
/// DAGUpdateListener is a friend so it can manipulate the listener stack.
friend struct DAGUpdateListener;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index fb0e63e..bac06cc 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -871,7 +871,8 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
: TM(tm), TSI(*tm.getSelectionDAGInfo()), TTI(0), OptLevel(OL),
EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)),
- Root(getEntryNode()), UpdateListeners(0) {
+ Root(getEntryNode()), NewNodesMustHaveLegalTypes(false),
+ UpdateListeners(0) {
AllNodes.push_back(&EntryNode);
DbgInfo = new SDDbgInfo();
}
@@ -983,6 +984,54 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) {
APInt NewVal = Elt->getValue().zext(EltVT.getSizeInBits());
Elt = ConstantInt::get(*getContext(), NewVal);
}
+ // In other cases the element type is illegal and needs to be expanded, for
+ // example v2i64 on MIPS32. In this case, find the nearest legal type, split
+ // the value into n parts and use a vector type with n-times the elements.
+ // Then bitcast to the type requested.
+ // Legalizing constants too early makes the DAGCombiner's job harder so we
+ // only legalize if the DAG tells us we must produce legal types.
+ else if (NewNodesMustHaveLegalTypes && VT.isVector() &&
+ TLI->getTypeAction(*getContext(), EltVT) ==
+ TargetLowering::TypeExpandInteger) {
+ APInt NewVal = Elt->getValue();
+ EVT ViaEltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
+ unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits();
+ unsigned ViaVecNumElts = VT.getSizeInBits() / ViaEltSizeInBits;
+ EVT ViaVecVT = EVT::getVectorVT(*getContext(), ViaEltVT, ViaVecNumElts);
+
+ // Check the temporary vector is the correct size. If this fails then
+ // getTypeToTransformTo() probably returned a type whose size (in bits)
+ // isn't a power-of-2 factor of the requested type size.
+ assert(ViaVecVT.getSizeInBits() == VT.getSizeInBits());
+
+ SmallVector<SDValue, 2> EltParts;
+ for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) {
+ EltParts.push_back(getConstant(NewVal.lshr(i * ViaEltSizeInBits)
+ .trunc(ViaEltSizeInBits),
+ ViaEltVT, isT));
+ }
+
+ // EltParts is currently in little endian order. If we actually want
+ // big-endian order then reverse it now.
+ if (TLI->isBigEndian())
+ std::reverse(EltParts.begin(), EltParts.end());
+
+ // The elements must be reversed when the element order is different
+ // to the endianness of the elements (because the BITCAST is itself a
+ // vector shuffle in this situation). However, we do not need any code to
+ // perform this reversal because getConstant() is producing a vector
+ // splat.
+ // This situation occurs in MIPS MSA.
+
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < VT.getVectorNumElements(); ++i)
+ Ops.insert(Ops.end(), EltParts.begin(), EltParts.end());
+
+ SDValue Result = getNode(ISD::BITCAST, SDLoc(), VT,
+ getNode(ISD::BUILD_VECTOR, SDLoc(), ViaVecVT,
+ &Ops[0], Ops.size()));
+ return Result;
+ }
assert(Elt->getBitWidth() == EltVT.getSizeInBits() &&
"APInt size does not match type size!");
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index c937996..8bf8756 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -666,6 +666,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
<< BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump());
}
+ CurDAG->NewNodesMustHaveLegalTypes = true;
+
if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName);
{
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index 9585efb..e2ea629 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -1254,38 +1254,73 @@ static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
return Result;
}
-static SDValue
-lowerMSASplatImm(SDLoc DL, EVT ResTy, SDValue ImmOp, SelectionDAG &DAG) {
- EVT ViaVecTy = ResTy;
- SmallVector<SDValue, 16> Ops;
- SDValue ImmHiOp;
-
- if (ViaVecTy == MVT::v2i64) {
- ImmHiOp = DAG.getNode(ISD::SRA, DL, MVT::i32, ImmOp,
- DAG.getConstant(31, MVT::i32));
- for (unsigned i = 0; i < ViaVecTy.getVectorNumElements(); ++i) {
- Ops.push_back(ImmHiOp);
- Ops.push_back(ImmOp);
- }
+static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) {
+ EVT ResVecTy = Op->getValueType(0);
+ EVT ViaVecTy = ResVecTy;
+ SDLoc DL(Op);
+
+ // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and
+ // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating
+ // lanes.
+ SDValue LaneA;
+ SDValue LaneB = Op->getOperand(2);
+
+ if (ResVecTy == MVT::v2i64) {
+ LaneA = DAG.getConstant(0, MVT::i32);
ViaVecTy = MVT::v4i32;
- } else {
- for (unsigned i = 0; i < ResTy.getVectorNumElements(); ++i)
- Ops.push_back(ImmOp);
- }
+ } else
+ LaneA = LaneB;
- SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, DL, ViaVecTy, &Ops[0],
- Ops.size());
+ SDValue Ops[16] = { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB,
+ LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB };
- if (ResTy != ViaVecTy)
- Result = DAG.getNode(ISD::BITCAST, DL, ResTy, Result);
+ SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, DL, ViaVecTy, Ops,
+ ViaVecTy.getVectorNumElements());
+
+ if (ViaVecTy != ResVecTy)
+ Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy, Result);
return Result;
}
-static SDValue
-lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG) {
- return lowerMSASplatImm(SDLoc(Op), Op->getValueType(0),
- Op->getOperand(ImmOp), DAG);
+static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG) {
+ return DAG.getConstant(Op->getConstantOperandVal(ImmOp), Op->getValueType(0));
+}
+
+static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue,
+ bool BigEndian, SelectionDAG &DAG) {
+ EVT ViaVecTy = VecTy;
+ SDValue SplatValueA = SplatValue;
+ SDValue SplatValueB = SplatValue;
+ SDLoc DL(SplatValue);
+
+ if (VecTy == MVT::v2i64) {
+ // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's.
+ ViaVecTy = MVT::v4i32;
+
+ SplatValueA = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValue);
+ SplatValueB = DAG.getNode(ISD::SRL, DL, MVT::i64, SplatValue,
+ DAG.getConstant(32, MVT::i32));
+ SplatValueB = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValueB);
+ }
+
+ // We currently hold the parts in little endian order. Swap them if
+ // necessary.
+ if (BigEndian)
+ std::swap(SplatValueA, SplatValueB);
+
+ SDValue Ops[16] = { SplatValueA, SplatValueB, SplatValueA, SplatValueB,
+ SplatValueA, SplatValueB, SplatValueA, SplatValueB,
+ SplatValueA, SplatValueB, SplatValueA, SplatValueB,
+ SplatValueA, SplatValueB, SplatValueA, SplatValueB };
+
+ SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, DL, ViaVecTy, Ops,
+ ViaVecTy.getVectorNumElements());
+
+ if (VecTy != ViaVecTy)
+ Result = DAG.getNode(ISD::BITCAST, DL, VecTy, Result);
+
+ return Result;
}
static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG,
@@ -1295,27 +1330,37 @@ static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG,
SDValue Exp2Imm;
SDLoc DL(Op);
- // The DAG Combiner can't constant fold bitcasted vectors so we must do it
- // here.
+ // The DAG Combiner can't constant fold bitcasted vectors yet so we must do it
+ // here for now.
if (VecTy == MVT::v2i64) {
if (ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(Imm)) {
APInt BitImm = APInt(64, 1) << CImm->getAPIntValue();
SDValue BitImmHiOp = DAG.getConstant(BitImm.lshr(32).trunc(32), MVT::i32);
- SDValue BitImmOp = DAG.getConstant(BitImm.trunc(32), MVT::i32);
- Exp2Imm = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64,
- DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32,
- BitImmHiOp, BitImmOp,
- BitImmHiOp, BitImmOp));
+ SDValue BitImmLoOp = DAG.getConstant(BitImm.trunc(32), MVT::i32);
+
+ if (BigEndian)
+ std::swap(BitImmLoOp, BitImmHiOp);
+
+ Exp2Imm =
+ DAG.getNode(ISD::BITCAST, DL, MVT::v2i64,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, BitImmLoOp,
+ BitImmHiOp, BitImmLoOp, BitImmHiOp));
}
}
if (Exp2Imm.getNode() == NULL) {
// We couldnt constant fold, do a vector shift instead
- SDValue One = lowerMSASplatImm(DL, VecTy, DAG.getConstant(1, MVT::i32),
- DAG);
- Exp2Imm = lowerMSASplatImm(DL, VecTy, Imm, DAG);
- Exp2Imm = DAG.getNode(ISD::SHL, DL, VecTy, One, Exp2Imm);
+
+ // Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since
+ // only values 0-63 are valid.
+ if (VecTy == MVT::v2i64)
+ Imm = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Imm);
+
+ Exp2Imm = getBuildVectorSplat(VecTy, Imm, BigEndian, DAG);
+
+ Exp2Imm =
+ DAG.getNode(ISD::SHL, DL, VecTy, DAG.getConstant(1, VecTy), Exp2Imm);
}
return DAG.getNode(Opc, DL, VecTy, Op->getOperand(1), Exp2Imm);
@@ -1325,7 +1370,7 @@ static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) {
EVT ResTy = Op->getValueType(0);
EVT ViaVecTy = ResTy == MVT::v2i64 ? MVT::v4i32 : ResTy;
SDLoc DL(Op);
- SDValue One = lowerMSASplatImm(DL, ResTy, DAG.getConstant(1, MVT::i32), DAG);
+ SDValue One = DAG.getConstant(1, ResTy);
SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, Op->getOperand(2));
SDValue AllOnes = DAG.getConstant(-1, MVT::i32);
@@ -1346,15 +1391,9 @@ static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) {
static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
EVT ResTy = Op->getValueType(0);
- SDValue SHAmount = Op->getOperand(2);
- EVT ImmTy = SHAmount->getValueType(0);
- SDValue Bit =
- DAG.getNode(ISD::SHL, DL, ImmTy, DAG.getConstant(1, ImmTy), SHAmount);
- SDValue BitMask = DAG.getNOT(DL, Bit, ImmTy);
-
- assert(ResTy.getVectorNumElements() <= 16);
-
- BitMask = lowerMSASplatImm(DL, ResTy, BitMask, DAG);
+ APInt BitImm = APInt(ResTy.getVectorElementType().getSizeInBits(), 1)
+ << cast<ConstantSDNode>(Op->getOperand(2))->getAPIntValue();
+ SDValue BitMask = DAG.getConstant(~BitImm, ResTy);
return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), BitMask);
}
@@ -1469,8 +1508,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::mips_bneg_w:
case Intrinsic::mips_bneg_d: {
EVT VecTy = Op->getValueType(0);
- SDValue One = lowerMSASplatImm(DL, VecTy, DAG.getConstant(1, MVT::i32),
- DAG);
+ SDValue One = DAG.getConstant(1, VecTy);
return DAG.getNode(ISD::XOR, DL, VecTy, Op->getOperand(1),
DAG.getNode(ISD::SHL, DL, VecTy, One,
@@ -1504,8 +1542,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::mips_bset_w:
case Intrinsic::mips_bset_d: {
EVT VecTy = Op->getValueType(0);
- SDValue One = lowerMSASplatImm(DL, VecTy, DAG.getConstant(1, MVT::i32),
- DAG);
+ SDValue One = DAG.getConstant(1, VecTy);
return DAG.getNode(ISD::OR, DL, VecTy, Op->getOperand(1),
DAG.getNode(ISD::SHL, DL, VecTy, One,
@@ -1926,7 +1963,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
// EXTRACT_VECTOR_ELT can't extract i64's on MIPS32.
// Instead we lower to MipsISD::VSHF and match from there.
return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0),
- lowerMSASplatImm(Op, 2, DAG), Op->getOperand(1),
+ lowerMSASplatZExt(Op, 2, DAG), Op->getOperand(1),
Op->getOperand(1));
case Intrinsic::mips_splati_b:
case Intrinsic::mips_splati_h:
@@ -2200,15 +2237,10 @@ SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op,
return SDValue();
}
- SmallVector<SDValue, 16> Ops;
- SDValue Constant = DAG.getConstant(SplatValue.sextOrSelf(32), MVT::i32);
-
- for (unsigned i = 0; i < ViaVecTy.getVectorNumElements(); ++i)
- Ops.push_back(Constant);
-
- SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Node), ViaVecTy,
- &Ops[0], Ops.size());
+ // SelectionDAG::getConstant will promote SplatValue appropriately.
+ SDValue Result = DAG.getConstant(SplatValue, ViaVecTy);
+ // Bitcast to the type we originally wanted
if (ViaVecTy != ResTy)
Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
diff --git a/test/CodeGen/Mips/msa/3r-b.ll b/test/CodeGen/Mips/msa/3r-b.ll
index 7c45b2b..a05d19b 100644
--- a/test/CodeGen/Mips/msa/3r-b.ll
+++ b/test/CodeGen/Mips/msa/3r-b.ll
@@ -3,7 +3,6 @@
; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
-; XFAIL: *
@llvm_mips_bclr_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
@llvm_mips_bclr_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
diff --git a/test/CodeGen/Mips/msa/bit.ll b/test/CodeGen/Mips/msa/bit.ll
index dc8bb8f..59ddbe1 100644
--- a/test/CodeGen/Mips/msa/bit.ll
+++ b/test/CodeGen/Mips/msa/bit.ll
@@ -1,7 +1,3 @@
-; Both endians should emit the same output for immediate instructions.
-; This is not currently true.
-; XFAIL: *
-
; Test the MSA intrinsics that are encoded with the BIT instruction format.
; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
diff --git a/test/CodeGen/Mips/msa/elm_shift_slide.ll b/test/CodeGen/Mips/msa/elm_shift_slide.ll
index 3b3504b..39d670d 100644
--- a/test/CodeGen/Mips/msa/elm_shift_slide.ll
+++ b/test/CodeGen/Mips/msa/elm_shift_slide.ll
@@ -1,7 +1,3 @@
-; Both endians should emit the same output for immediate instructions.
-; This is not currently true.
-; XFAIL: *
-
; Test the MSA intrinsics that are encoded with the ELM instruction format and
; are either shifts or slides.
diff --git a/test/CodeGen/Mips/msa/i5-a.ll b/test/CodeGen/Mips/msa/i5-a.ll
index f45df07..0b50720 100644
--- a/test/CodeGen/Mips/msa/i5-a.ll
+++ b/test/CodeGen/Mips/msa/i5-a.ll
@@ -1,7 +1,3 @@
-; Both endians should emit the same output for immediate instructions.
-; This is not currently true.
-; XFAIL: *
-
; Test the MSA intrinsics that are encoded with the I5 instruction format.
; There are lots of these so this covers those beginning with 'a'
diff --git a/test/CodeGen/Mips/msa/i5-b.ll b/test/CodeGen/Mips/msa/i5-b.ll
index f4477a0..da6be66 100644
--- a/test/CodeGen/Mips/msa/i5-b.ll
+++ b/test/CodeGen/Mips/msa/i5-b.ll
@@ -3,7 +3,6 @@
; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
-; XFAIL: *
@llvm_mips_bclri_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
@llvm_mips_bclri_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
diff --git a/test/CodeGen/Mips/msa/i5-c.ll b/test/CodeGen/Mips/msa/i5-c.ll
index d63b3cc..bf1578f 100644
--- a/test/CodeGen/Mips/msa/i5-c.ll
+++ b/test/CodeGen/Mips/msa/i5-c.ll
@@ -1,7 +1,3 @@
-; Both endians should emit the same output for immediate instructions.
-; This is not currently true.
-; XFAIL: *
-
; Test the MSA intrinsics that are encoded with the I5 instruction format.
; There are lots of these so this covers those beginning with 'c'
diff --git a/test/CodeGen/Mips/msa/i5-m.ll b/test/CodeGen/Mips/msa/i5-m.ll
index 74e698b..2766349 100644
--- a/test/CodeGen/Mips/msa/i5-m.ll
+++ b/test/CodeGen/Mips/msa/i5-m.ll
@@ -1,7 +1,3 @@
-; Both endians should emit the same output for immediate instructions.
-; This is not currently true.
-; XFAIL: *
-
; Test the MSA intrinsics that are encoded with the I5 instruction format.
; There are lots of these so this covers those beginning with 'm'
diff --git a/test/CodeGen/Mips/msa/i5-s.ll b/test/CodeGen/Mips/msa/i5-s.ll
index 60ba8e1..184172f 100644
--- a/test/CodeGen/Mips/msa/i5-s.ll
+++ b/test/CodeGen/Mips/msa/i5-s.ll
@@ -1,7 +1,3 @@
-; Both endians should emit the same output for immediate instructions.
-; This is not currently true.
-; XFAIL: *
-
; Test the MSA intrinsics that are encoded with the I5 instruction format.
; There are lots of these so this covers those beginning with 's'
diff --git a/test/CodeGen/Mips/msa/llvm-stress-s1704963983.ll b/test/CodeGen/Mips/msa/llvm-stress-s1704963983.ll
new file mode 100644
index 0000000..4beaaa9
--- /dev/null
+++ b/test/CodeGen/Mips/msa/llvm-stress-s1704963983.ll
@@ -0,0 +1,134 @@
+; RUN: llc -march=mips < %s
+; RUN: llc -march=mips -mattr=+msa,+fp64 < %s
+; RUN: llc -march=mipsel < %s
+; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s
+
+; This test originally failed for MSA with a
+; "Unexpected illegal type!" assertion.
+; It should at least successfully build.
+
+define void @autogen_SD1704963983(i8*, i32*, i64*, i32, i64, i8) {
+BB:
+ %A4 = alloca <4 x double>
+ %A3 = alloca <8 x i64>
+ %A2 = alloca <1 x double>
+ %A1 = alloca double
+ %A = alloca i32
+ %L = load i8* %0
+ store i8 77, i8* %0
+ %E = extractelement <8 x i64> zeroinitializer, i32 2
+ %Shuff = shufflevector <8 x i64> zeroinitializer, <8 x i64> zeroinitializer, <8 x i32> <i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15, i32 1, i32 3>
+ %I = insertelement <8 x i64> zeroinitializer, i64 %E, i32 7
+ %Sl = select i1 false, i8* %0, i8* %0
+ %Cmp = icmp eq i32 434069, 272505
+ br label %CF
+
+CF: ; preds = %CF, %CF78, %BB
+ %L5 = load i8* %Sl
+ store i8 %L, i8* %Sl
+ %E6 = extractelement <8 x i32> zeroinitializer, i32 2
+ %Shuff7 = shufflevector <8 x i64> zeroinitializer, <8 x i64> %Shuff, <8 x i32> <i32 13, i32 15, i32 1, i32 3, i32 5, i32 7, i32 9, i32 undef>
+ %I8 = insertelement <8 x i64> zeroinitializer, i64 %4, i32 7
+ %B = shl <1 x i16> zeroinitializer, zeroinitializer
+ %FC = sitofp <8 x i64> zeroinitializer to <8 x float>
+ %Sl9 = select i1 %Cmp, i8 77, i8 77
+ %Cmp10 = icmp uge <8 x i64> %Shuff, zeroinitializer
+ %L11 = load i8* %0
+ store i8 %Sl9, i8* %0
+ %E12 = extractelement <1 x i16> zeroinitializer, i32 0
+ %Shuff13 = shufflevector <8 x i64> zeroinitializer, <8 x i64> %Shuff, <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 undef, i32 3, i32 5, i32 7>
+ %I14 = insertelement <4 x i32> zeroinitializer, i32 %3, i32 3
+ %B15 = udiv <1 x i16> %B, zeroinitializer
+ %Tr = trunc <8 x i64> %Shuff to <8 x i32>
+ %Sl16 = select i1 %Cmp, i8 77, i8 %5
+ %Cmp17 = icmp ult <8 x i1> %Cmp10, %Cmp10
+ %L18 = load i8* %Sl
+ store i8 -1, i8* %Sl
+ %E19 = extractelement <8 x i32> zeroinitializer, i32 3
+ %Shuff20 = shufflevector <8 x float> %FC, <8 x float> %FC, <8 x i32> <i32 6, i32 8, i32 undef, i32 12, i32 14, i32 0, i32 2, i32 undef>
+ %I21 = insertelement <8 x i64> %Shuff13, i64 %E, i32 0
+ %B22 = urem <8 x i64> %Shuff7, %I21
+ %FC23 = sitofp i32 50347 to float
+ %Sl24 = select i1 %Cmp, double 0.000000e+00, double 0.000000e+00
+ %Cmp25 = icmp ugt i32 465489, 47533
+ br i1 %Cmp25, label %CF, label %CF78
+
+CF78: ; preds = %CF
+ %L26 = load i8* %Sl
+ store i32 50347, i32* %A
+ %E27 = extractelement <8 x i1> %Cmp10, i32 2
+ br i1 %E27, label %CF, label %CF77
+
+CF77: ; preds = %CF77, %CF81, %CF78
+ %Shuff28 = shufflevector <8 x i64> zeroinitializer, <8 x i64> %Shuff, <8 x i32> <i32 13, i32 15, i32 1, i32 3, i32 5, i32 7, i32 9, i32 undef>
+ %I29 = insertelement <1 x i16> zeroinitializer, i16 -1, i32 0
+ %B30 = urem <8 x i32> %Tr, zeroinitializer
+ %Tr31 = trunc i32 0 to i16
+ %Sl32 = select i1 %Cmp, <2 x i1> zeroinitializer, <2 x i1> zeroinitializer
+ %L33 = load i8* %Sl
+ store i8 %L26, i8* %Sl
+ %E34 = extractelement <4 x i32> zeroinitializer, i32 0
+ %Shuff35 = shufflevector <1 x i16> zeroinitializer, <1 x i16> %B, <1 x i32> undef
+ %I36 = insertelement <8 x i64> %Shuff28, i64 %E, i32 7
+ %B37 = srem <1 x i16> %I29, zeroinitializer
+ %FC38 = sitofp <8 x i32> %B30 to <8 x double>
+ %Sl39 = select i1 %Cmp, double 0.000000e+00, double %Sl24
+ %L40 = load i8* %Sl
+ store i8 %Sl16, i8* %Sl
+ %E41 = extractelement <1 x i16> zeroinitializer, i32 0
+ %Shuff42 = shufflevector <8 x i1> %Cmp17, <8 x i1> %Cmp10, <8 x i32> <i32 14, i32 undef, i32 2, i32 4, i32 undef, i32 8, i32 10, i32 12>
+ %I43 = insertelement <4 x i32> zeroinitializer, i32 272505, i32 0
+ %B44 = urem <8 x i32> %B30, %Tr
+ %PC = bitcast i8* %0 to i64*
+ %Sl45 = select i1 %Cmp, <8 x i1> %Cmp10, <8 x i1> %Shuff42
+ %Cmp46 = fcmp ugt float 0xB856238A00000000, 0x47DA795E40000000
+ br i1 %Cmp46, label %CF77, label %CF80
+
+CF80: ; preds = %CF80, %CF77
+ %L47 = load i64* %PC
+ store i8 77, i8* %Sl
+ %E48 = extractelement <8 x i64> zeroinitializer, i32 2
+ %Shuff49 = shufflevector <8 x i64> zeroinitializer, <8 x i64> %Shuff7, <8 x i32> <i32 5, i32 7, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 3>
+ %I50 = insertelement <8 x i64> zeroinitializer, i64 %L47, i32 7
+ %B51 = fdiv float 0x46CC2D8000000000, %FC23
+ %PC52 = bitcast <8 x i64>* %A3 to i64*
+ %Sl53 = select i1 %Cmp, <8 x i64> %Shuff, <8 x i64> %Shuff
+ %Cmp54 = fcmp ole float 0x47DA795E40000000, 0xB856238A00000000
+ br i1 %Cmp54, label %CF80, label %CF81
+
+CF81: ; preds = %CF80
+ %L55 = load i8* %Sl
+ store i8 %Sl16, i8* %Sl
+ %E56 = extractelement <1 x i16> %B, i32 0
+ %Shuff57 = shufflevector <1 x i16> zeroinitializer, <1 x i16> zeroinitializer, <1 x i32> <i32 1>
+ %I58 = insertelement <8 x i64> zeroinitializer, i64 %L47, i32 7
+ %B59 = srem i32 %E19, %E19
+ %Sl60 = select i1 %Cmp, i8 77, i8 77
+ %Cmp61 = icmp ult <1 x i16> zeroinitializer, %B
+ %L62 = load i8* %Sl
+ store i64 %L47, i64* %PC52
+ %E63 = extractelement <4 x i32> %I43, i32 2
+ %Shuff64 = shufflevector <4 x i1> zeroinitializer, <4 x i1> zeroinitializer, <4 x i32> <i32 undef, i32 undef, i32 1, i32 3>
+ %I65 = insertelement <8 x i64> %B22, i64 %L47, i32 7
+ %B66 = add <8 x i64> %I50, %I65
+ %FC67 = uitofp i16 %E12 to float
+ %Sl68 = select i1 %Cmp, <8 x i32> %B30, <8 x i32> zeroinitializer
+ %Cmp69 = fcmp ord double 0.000000e+00, 0.000000e+00
+ br i1 %Cmp69, label %CF77, label %CF79
+
+CF79: ; preds = %CF81
+ %L70 = load i32* %A
+ store i64 %4, i64* %PC
+ %E71 = extractelement <4 x i32> zeroinitializer, i32 0
+ %Shuff72 = shufflevector <8 x i32> zeroinitializer, <8 x i32> %B44, <8 x i32> <i32 11, i32 undef, i32 15, i32 1, i32 3, i32 undef, i32 7, i32 9>
+ %I73 = insertelement <8 x i16> zeroinitializer, i16 %E12, i32 5
+ %B74 = fsub double 0.000000e+00, 0.000000e+00
+ %Sl75 = select i1 %Cmp46, i32 %E6, i32 %E19
+ %Cmp76 = icmp ugt <4 x i32> %I43, zeroinitializer
+ store i8 %L, i8* %Sl
+ store i64 %L47, i64* %PC
+ store i64 %L47, i64* %PC
+ store i8 %L5, i8* %Sl
+ store i8 %L5, i8* %0
+ ret void
+}