23 files changed, 1971 insertions, 607 deletions
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
index 6023326..9a79217 100644
--- a/lib/CodeGen/SelectionDAG/CMakeLists.txt
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -10,13 +10,15 @@ add_llvm_library(LLVMSelectionDAG
   LegalizeTypesGeneric.cpp
   LegalizeVectorOps.cpp
   LegalizeVectorTypes.cpp
+  ResourcePriorityQueue.cpp
   ScheduleDAGFast.cpp
- ScheduleDAGRRList.cpp
+  ScheduleDAGRRList.cpp
   ScheduleDAGSDNodes.cpp
   SelectionDAG.cpp
   SelectionDAGBuilder.cpp
   SelectionDAGISel.cpp
   SelectionDAGPrinter.cpp
+  ScheduleDAGVLIW.cpp
   TargetLowering.cpp
   TargetSelectionDAGInfo.cpp
   )
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 80cf0a8..1b148ad 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -63,7 +63,24 @@ namespace {
     bool LegalTypes;
 
     // Worklist of all of the nodes that need to be simplified.
-    std::vector<SDNode*> WorkList;
+    //
+    // This has the semantics that when adding to the worklist,
+    // the item added must be next to be processed. It should
+    // also only appear once. The naive approach to this takes
+    // linear time.
+    //
+    // To reduce the insert/remove time to logarithmic, we use
+    // a set and a vector to maintain our worklist.
+    //
+    // The set contains the items on the worklist, but does not
+    // maintain the order they should be visited.
+    //
+    // The vector maintains the order nodes should be visited, but may
+    // contain duplicate or removed nodes. When choosing a node to
+    // visit, we pop off the order stack until we find an item that is
+    // also in the contents set. All operations are O(log N).
+    SmallPtrSet<SDNode*, 64> WorkListContents;
+    std::vector<SDNode*> WorkListOrder;
 
     // AA - Used for DAG load/store alias analysis.
     AliasAnalysis &AA;
@@ -83,18 +100,17 @@ namespace {
     SDValue visit(SDNode *N);
 
   public:
-    /// AddToWorkList - Add to the work list making sure it's instance is at the
-    /// the back (next to be processed.)
+    /// AddToWorkList - Add to the work list making sure its instance is at the
+    /// back (next to be processed.)
     void AddToWorkList(SDNode *N) {
-      removeFromWorkList(N);
-      WorkList.push_back(N);
+      WorkListContents.insert(N);
+      WorkListOrder.push_back(N);
     }
 
     /// removeFromWorkList - remove all instances of N from the worklist.
     ///
     void removeFromWorkList(SDNode *N) {
-      WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), N),
-                     WorkList.end());
+      WorkListContents.erase(N);
     }
 
     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
@@ -158,7 +174,9 @@ namespace {
     SDValue visitADD(SDNode *N);
     SDValue visitSUB(SDNode *N);
     SDValue visitADDC(SDNode *N);
+    SDValue visitSUBC(SDNode *N);
     SDValue visitADDE(SDNode *N);
+    SDValue visitSUBE(SDNode *N);
     SDValue visitMUL(SDNode *N);
     SDValue visitSDIV(SDNode *N);
     SDValue visitUDIV(SDNode *N);
@@ -957,10 +975,9 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
   LegalTypes = Level >= AfterLegalizeTypes;
 
   // Add all the dag nodes to the worklist.
-  WorkList.reserve(DAG.allnodes_size());
   for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
        E = DAG.allnodes_end(); I != E; ++I)
-    WorkList.push_back(I);
+    AddToWorkList(I);
 
   // Create a dummy node (which is not added to allnodes), that adds a reference
   // to the root node, preventing it from being deleted, and tracking any
@@ -971,11 +988,18 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
   // done.  Set it to null to avoid confusion.
   DAG.setRoot(SDValue());
 
-  // while the worklist isn't empty, inspect the node on the end of it and
+  // while the worklist isn't empty, find a node and
   // try and combine it.
-  while (!WorkList.empty()) {
-    SDNode *N = WorkList.back();
-    WorkList.pop_back();
+  while (!WorkListContents.empty()) {
+    SDNode *N;
+    // The WorkListOrder holds the SDNodes in order, but it may contain duplicates.
+    // In order to avoid a linear scan, we use a set (O(log N)) to hold what the
+    // worklist *should* contain, and check the node we want to visit is should
+    // actually be visited.
+    do {
+      N = WorkListOrder.back();
+      WorkListOrder.pop_back();
+    } while (!WorkListContents.erase(N));
 
     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
     // N is deleted from the DAG, since they too may now be dead or may have a
@@ -1059,7 +1083,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::ADD:                return visitADD(N);
   case ISD::SUB:                return visitSUB(N);
   case ISD::ADDC:               return visitADDC(N);
+  case ISD::SUBC:               return visitSUBC(N);
   case ISD::ADDE:               return visitADDE(N);
+  case ISD::SUBE:               return visitSUBE(N);
   case ISD::MUL:                return visitMUL(N);
   case ISD::SDIV:               return visitSDIV(N);
   case ISD::UDIV:               return visitUDIV(N);
@@ -1497,8 +1523,8 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
   EVT VT = N0.getValueType();
 
   // If the flag result is dead, turn this into an ADD.
-  if (N->hasNUsesOfValue(0, 1))
-    return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0),
+  if (!N->hasAnyUseOfValue(1))
+    return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, N1),
                      DAG.getNode(ISD::CARRY_FALSE,
                                  N->getDebugLoc(), MVT::Glue));
 
@@ -1546,7 +1572,7 @@ SDValue DAGCombiner::visitADDE(SDNode *N) {
 
   // fold (adde x, y, false) -> (addc x, y)
   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
-    return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0);
+    return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N0, N1);
 
   return SDValue();
 }
@@ -1656,6 +1682,51 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitSUBC(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N0.getValueType();
+
+  // If the flag result is dead, turn this into an SUB.
+  if (!N->hasAnyUseOfValue(1))
+    return CombineTo(N, DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1),
+                     DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+                                 MVT::Glue));
+
+  // fold (subc x, x) -> 0 + no borrow
+  if (N0 == N1)
+    return CombineTo(N, DAG.getConstant(0, VT),
+                     DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+                                 MVT::Glue));
+
+  // fold (subc x, 0) -> x + no borrow
+  if (N1C && N1C->isNullValue())
+    return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+                                        MVT::Glue));
+
+  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
+  if (N0C && N0C->isAllOnesValue())
+    return CombineTo(N, DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0),
+                     DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+                                 MVT::Glue));
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSUBE(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue CarryIn = N->getOperand(2);
+
+  // fold (sube x, y, false) -> (subc x, y)
+  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
+    return DAG.getNode(ISD::SUBC, N->getDebugLoc(), N->getVTList(), N0, N1);
+
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitMUL(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -2320,6 +2391,88 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
     }
   }
+  // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> 
+  // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
+  // already be zero by virtue of the width of the base type of the load.
+  //
+  // the 'X' node here can either be nothing or an extract_vector_elt to catch
+  // more cases.
+  if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+       N0.getOperand(0).getOpcode() == ISD::LOAD) ||
+      N0.getOpcode() == ISD::LOAD) {
+    LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
+                                         N0 : N0.getOperand(0) );
+
+    // Get the constant (if applicable) the zero'th operand is being ANDed with.
+    // This can be a pure constant or a vector splat, in which case we treat the
+    // vector as a scalar and use the splat value.
+    APInt Constant = APInt::getNullValue(1);
+    if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+      Constant = C->getAPIntValue();
+    } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
+      APInt SplatValue, SplatUndef;
+      unsigned SplatBitSize;
+      bool HasAnyUndefs;
+      bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
+                                             SplatBitSize, HasAnyUndefs);
+      if (IsSplat) {
+        // Undef bits can contribute to a possible optimisation if set, so
+        // set them.
+        SplatValue |= SplatUndef;
+
+        // The splat value may be something like "0x00FFFFFF", which means 0 for
+        // the first vector value and FF for the rest, repeating. We need a mask
+        // that will apply equally to all members of the vector, so AND all the
+        // lanes of the constant together.
+        EVT VT = Vector->getValueType(0);
+        unsigned BitWidth = VT.getVectorElementType().getSizeInBits();
+        Constant = APInt::getAllOnesValue(BitWidth);
+        for (unsigned i = 0, n = VT.getVectorNumElements(); i < n; ++i)
+          Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
+      }
+    }
+
+    // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
+    // actually legal and isn't going to get expanded, else this is a false
+    // optimisation.
+    bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
+                                                    Load->getMemoryVT());
+
+    // Resize the constant to the same size as the original memory access before
+    // extension. If it is still the AllOnesValue then this AND is completely
+    // unneeded.
+    Constant =
+      Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits());
+
+    bool B;
+    switch (Load->getExtensionType()) {
+    default: B = false; break;
+    case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
+    case ISD::ZEXTLOAD:
+    case ISD::NON_EXTLOAD: B = true; break;
+    }
+
+    if (B && Constant.isAllOnesValue()) {
+      // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
+      // preserve semantics once we get rid of the AND.
+      SDValue NewLoad(Load, 0);
+      if (Load->getExtensionType() == ISD::EXTLOAD) {
+        NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
+                              Load->getValueType(0), Load->getDebugLoc(),
+                              Load->getChain(), Load->getBasePtr(),
+                              Load->getOffset(), Load->getMemoryVT(),
+                              Load->getMemOperand());
+        // Replace uses of the EXTLOAD with the new ZEXTLOAD.
+        CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
+      }
+
+      // Fold the AND away, taking care not to fold to the old load node if we
+      // replaced it.
+      CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
+
+      return SDValue(N, 0); // Return N so it doesn't get rechecked!
+    }
+  }
   // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
   if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
     ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
@@ -3331,7 +3484,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
 
   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
   //                               (and (srl x, (sub c1, c2), MASK)
-  if (N1C && N0.getOpcode() == ISD::SRL &&
+  // Only fold this if the inner shift has no other uses -- if it does, folding
+  // this will increase the total number of instructions.
+  if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
       N0.getOperand(1).getOpcode() == ISD::Constant) {
     uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
     if (c1 < VT.getSizeInBits()) {
@@ -4203,6 +4358,29 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
     return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT,
                        N0.getOperand(0));
 
+  // fold (zext (truncate x)) -> (zext x) or
+  //      (zext (truncate x)) -> (truncate x)
+  // This is valid when the truncated bits of x are already zero.
+  // FIXME: We should extend this to work for vectors too.
+  if (N0.getOpcode() == ISD::TRUNCATE && !VT.isVector()) {
+    SDValue Op = N0.getOperand(0);
+    APInt TruncatedBits
+      = APInt::getBitsSet(Op.getValueSizeInBits(),
+                          N0.getValueSizeInBits(),
+                          std::min(Op.getValueSizeInBits(),
+                                   VT.getSizeInBits()));
+    APInt KnownZero, KnownOne;
+    DAG.ComputeMaskedBits(Op, TruncatedBits, KnownZero, KnownOne);
+    if (TruncatedBits == KnownZero) {
+      if (VT.bitsGT(Op.getValueType()))
+        return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, Op);
+      if (VT.bitsLT(Op.getValueType()))
+        return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
+
+      return Op;
+    }
+  }
+
   // fold (zext (truncate (load x))) -> (zext (smaller load x))
   // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
   if (N0.getOpcode() == ISD::TRUNCATE) {
@@ -4883,6 +5061,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
+  bool isLE = TLI.isLittleEndian();
 
   // noop truncate
   if (N0.getValueType() == N->getValueType(0))
@@ -4910,6 +5089,44 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
       return N0.getOperand(0);
   }
 
+  // Fold extract-and-trunc into a narrow extract. For example:
+  //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
+  //   i32 y = TRUNCATE(i64 x)
+  //        -- becomes --
+  //   v16i8 b = BITCAST (v2i64 val)
+  //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
+  //
+  // Note: We only run this optimization after type legalization (which often
+  // creates this pattern) and before operation legalization after which
+  // we need to be more careful about the vector instructions that we generate.
+  if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+      LegalTypes && !LegalOperations && N0->hasOneUse()) {
+
+    EVT VecTy = N0.getOperand(0).getValueType();
+    EVT ExTy = N0.getValueType();
+    EVT TrTy = N->getValueType(0);
+
+    unsigned NumElem = VecTy.getVectorNumElements();
+    unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
+
+    EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
+    assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
+
+    SDValue EltNo = N0->getOperand(1);
+    if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
+      int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+
+      int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
+
+      SDValue V = DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+                              NVT, N0.getOperand(0));
+
+      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+                         N->getDebugLoc(), TrTy, V,
+                         DAG.getConstant(Index, MVT::i32));
+    }
+  }
+
   // See if we can simplify the input to this truncate through knowledge that
   // only the low bits are being used.
   // For example "trunc (or (shl x, 8), y)" // -> trunc y
@@ -5910,6 +6127,44 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) {
   return SDValue();
 }
 
+/// canFoldInAddressingMode - Return true if 'Use' is a load or a store that
+/// uses N as its base pointer and that N may be folded in the load / store
+/// addressing mode. FIXME: This currently only looks for folding of
+/// [reg +/- imm] addressing modes.
+static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
+                                    SelectionDAG &DAG,
+                                    const TargetLowering &TLI) {
+  EVT VT;
+  if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
+    if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
+      return false;
+    VT = Use->getValueType(0);
+  } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
+    if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
+      return false;
+    VT = ST->getValue().getValueType();
+  } else
+    return false;
+
+  TargetLowering::AddrMode AM;
+  if (N->getOpcode() == ISD::ADD) {
+    ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+    if (Offset)
+      AM.BaseOffs = Offset->getSExtValue();
+    else
+      return false;
+  } else if (N->getOpcode() == ISD::SUB) {
+    ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+    if (Offset)
+      AM.BaseOffs = -Offset->getSExtValue();
+    else
+      return false;
+  } else
+    return false;
+
+  return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext()));
+}
+
 /// CombineToPreIndexedLoadStore - Try turning a load / store into a
 /// pre-indexed load / store when the base pointer is an add or subtract
 /// and it has other uses besides the load / store. After the
@@ -5996,10 +6251,9 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
     if (N->hasPredecessorHelper(Use, Visited, Worklist))
       return false;
 
-    if (!((Use->getOpcode() == ISD::LOAD &&
-           cast<LoadSDNode>(Use)->getBasePtr() == Ptr) ||
-          (Use->getOpcode() == ISD::STORE &&
-           cast<StoreSDNode>(Use)->getBasePtr() == Ptr)))
+    // If Ptr may be folded in addressing mode of other use, then it's
+    // not profitable to do this transformation.
+    if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
       RealUse = true;
   }
 
@@ -6096,7 +6350,8 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
         continue;
 
       // Try turning it into a post-indexed load / store except when
-      // 1) All uses are load / store ops that use it as base ptr.
+      // 1) All uses are load / store ops that use it as base ptr (and
+      //    it may be folded as addressing mmode).
       // 2) Op must be independent of N, i.e. Op is neither a predecessor
       //    nor a successor of N. Otherwise, if Op is folded that would
       //    create a cycle.
@@ -6119,10 +6374,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
           for (SDNode::use_iterator III = Use->use_begin(),
                  EEE = Use->use_end(); III != EEE; ++III) {
             SDNode *UseUse = *III;
-            if (!((UseUse->getOpcode() == ISD::LOAD &&
-                   cast<LoadSDNode>(UseUse)->getBasePtr().getNode() == Use) ||
-                  (UseUse->getOpcode() == ISD::STORE &&
-                   cast<StoreSDNode>(UseUse)->getBasePtr().getNode() == Use)))
+            if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) 
               RealUse = true;
           }
 
@@ -6189,7 +6441,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
   if (!LD->isVolatile()) {
     if (N->getValueType(1) == MVT::Other) {
       // Unindexed loads.
-      if (N->hasNUsesOfValue(0, 0)) {
+      if (!N->hasAnyUseOfValue(0)) {
         // It's not safe to use the two value CombineTo variant here. e.g.
         // v1, chain2 = load chain1, loc
         // v2, chain3 = load chain2, loc
@@ -6214,7 +6466,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
     } else {
       // Indexed loads.
       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
-      if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) {
+      if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) {
         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
         DEBUG(dbgs() << "\nReplacing.7 ";
               N->dump(&DAG);
@@ -6873,13 +7125,14 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
   // (vextract (scalar_to_vector val, 0) -> val
   SDValue InVec = N->getOperand(0);
+  EVT VT = InVec.getValueType();
+  EVT NVT = N->getValueType(0);
 
   if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
     // Check if the result type doesn't match the inserted element type. A
     // SCALAR_TO_VECTOR may truncate the inserted element and the
     // EXTRACT_VECTOR_ELT may widen the extracted vector.
     SDValue InOp = InVec.getOperand(0);
-    EVT NVT = N->getValueType(0);
     if (InOp.getValueType() != NVT) {
       assert(InOp.getValueType().isInteger() && NVT.isInteger());
       return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT);
@@ -6887,6 +7140,38 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
     return InOp;
   }
 
+  SDValue EltNo = N->getOperand(1);
+  bool ConstEltNo = isa<ConstantSDNode>(EltNo);
+
+  // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
+  // We only perform this optimization before the op legalization phase because
+  // we may introduce new vector instructions which are not backed by TD patterns.
+  // For example on AVX, extracting elements from a wide vector without using
+  // extract_subvector.
+  if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE
+      && ConstEltNo && !LegalOperations) {
+    int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+    int NumElem = VT.getVectorNumElements();
+    ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
+    // Find the new index to extract from.
+    int OrigElt = SVOp->getMaskElt(Elt);
+
+    // Extracting an undef index is undef.
+    if (OrigElt == -1)
+      return DAG.getUNDEF(NVT);
+
+    // Select the right vector half to extract from.
+    if (OrigElt < NumElem) {
+      InVec = InVec->getOperand(0);
+    } else {
+      InVec = InVec->getOperand(1);
+      OrigElt -= NumElem;
+    }
+
+    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), NVT,
+                       InVec, DAG.getConstant(OrigElt, MVT::i32));
+  }
+
   // Perform only after legalization to ensure build_vector / vector_shuffle
   // optimizations have already been done.
   if (!LegalOperations) return SDValue();
@@ -6894,17 +7179,19 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
-  SDValue EltNo = N->getOperand(1);
 
-  if (isa<ConstantSDNode>(EltNo)) {
+  if (ConstEltNo) {
     int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
     bool NewLoad = false;
     bool BCNumEltsChanged = false;
-    EVT VT = InVec.getValueType();
     EVT ExtVT = VT.getVectorElementType();
     EVT LVT = ExtVT;
 
     if (InVec.getOpcode() == ISD::BITCAST) {
+      // Don't duplicate a load with other uses.
+      if (!InVec.hasOneUse())
+        return SDValue();
+
       EVT BCVT = InVec.getOperand(0).getValueType();
       if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
         return SDValue();
@@ -6922,12 +7209,20 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
     } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
                InVec.getOperand(0).getValueType() == ExtVT &&
                ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
+      // Don't duplicate a load with other uses.
+      if (!InVec.hasOneUse())
+        return SDValue();
+
       LN0 = cast<LoadSDNode>(InVec.getOperand(0));
     } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
       // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
       // =>
       // (load $addr+1*size)
 
+      // Don't duplicate a load with other uses.
+      if (!InVec.hasOneUse())
+        return SDValue();
+
       // If the bit convert changed the number of elements, it is unsafe
       // to examine the mask.
       if (BCNumEltsChanged)
@@ -6938,14 +7233,21 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
       int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
       InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
 
-      if (InVec.getOpcode() == ISD::BITCAST)
+      if (InVec.getOpcode() == ISD::BITCAST) {
+        // Don't duplicate a load with other uses.
+        if (!InVec.hasOneUse())
+          return SDValue();
+
         InVec = InVec.getOperand(0);
+      }
       if (ISD::isNormalLoad(InVec.getNode())) {
         LN0 = cast<LoadSDNode>(InVec);
         Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
       }
     }
 
+    // Make sure we found a non-volatile load and the extractelement is
+    // the only use.
     if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
       return SDValue();
 
@@ -6982,6 +7284,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
     // The replacement we need to do here is a little tricky: we need to
     // replace an extractelement of a load with a load.
     // Use ReplaceAllUsesOfValuesWith to do the replacement.
+    // Note that this replacement assumes that the extractvalue is the only
+    // use of the load; that's okay because we don't want to perform this
+    // transformation in other cases anyway.
     SDValue Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
                                LN0->getPointerInfo().getWithOffset(PtrOff),
                                LN0->isVolatile(), LN0->isNonTemporal(), 
@@ -7011,11 +7316,13 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
   // optimizations. We do not handle sign-extend because we can't fill the sign
   // using shuffles.
   EVT SourceType = MVT::Other;
-  bool allAnyExt = true;
-  for (unsigned i = 0; i < NumInScalars; ++i) {
+  bool AllAnyExt = true;
+  bool AllUndef = true;
+  for (unsigned i = 0; i != NumInScalars; ++i) {
     SDValue In = N->getOperand(i);
     // Ignore undef inputs.
     if (In.getOpcode() == ISD::UNDEF) continue;
+    AllUndef = false;
 
     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
@@ -7040,15 +7347,17 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
     }
 
     // Check if all of the extends are ANY_EXTENDs.
-    allAnyExt &= AnyExt;
+    AllAnyExt &= AnyExt;
   }
 
+  if (AllUndef)
+    return DAG.getUNDEF(VT);
 
   // In order to have valid types, all of the inputs must be extended from the
   // same source type and all of the inputs must be any or zero extend.
   // Scalar sizes must be a power of two.
   EVT OutScalarTy = N->getValueType(0).getScalarType();
-  bool validTypes = SourceType != MVT::Other &&
+  bool ValidTypes = SourceType != MVT::Other &&
                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
                  isPowerOf2_32(SourceType.getSizeInBits());
 
@@ -7058,11 +7367,12 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
   // will be type-legalized to complex code sequences.
   // We perform this optimization only before the operation legalizer because we
   // may introduce illegal operations.
-  if (LegalTypes && !LegalOperations && validTypes) {
+  if ((Level == AfterLegalizeVectorOps || Level == AfterLegalizeTypes) &&
+      ValidTypes) {
     bool isLE = TLI.isLittleEndian();
     unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
     assert(ElemRatio > 1 && "Invalid element size ratio");
-    SDValue Filler = allAnyExt ? DAG.getUNDEF(SourceType):
+    SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
                                  DAG.getConstant(0, SourceType);
 
     unsigned NewBVElems = ElemRatio * N->getValueType(0).getVectorNumElements();
@@ -7117,15 +7427,8 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
       break;
     }
 
-    // If the input vector type disagrees with the result of the build_vector,
-    // we can't make a shuffle.
+    // We allow up to two distinct input vectors.
     SDValue ExtractedFromVec = N->getOperand(i).getOperand(0);
-    if (ExtractedFromVec.getValueType() != VT) {
-      VecIn1 = VecIn2 = SDValue(0, 0);
-      break;
-    }
-
-    // Otherwise, remember this.  We allow up to two distinct input vectors.
     if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
       continue;
 
@@ -7140,7 +7443,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
     }
   }
 
-  // If everything is good, we can make a shuffle operation.
+    // If everything is good, we can make a shuffle operation.
   if (VecIn1.getNode()) {
     SmallVector<int, 8> Mask;
     for (unsigned i = 0; i != NumInScalars; ++i) {
@@ -7166,14 +7469,39 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
       Mask.push_back(Idx+NumInScalars);
     }
 
-    // Add count and size info.
+    // We can't generate a shuffle node with mismatched input and output types.
+    // Attempt to transform a single input vector to the correct type.
+    if ((VT != VecIn1.getValueType())) {
+      // We don't support shuffeling between TWO values of different types.
+      if (VecIn2.getNode() != 0)
+        return SDValue();
+
+      // We only support widening of vectors which are half the size of the
+      // output registers. For example XMM->YMM widening on X86 with AVX.
+      if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits())
+        return SDValue();
+
+      // Widen the input vector by adding undef values.
+      VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
+                           VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
+    }
+
+    // If VecIn2 is unused then change it to undef.
+    VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+
+    // Check that we were able to transform all incoming values to the same type.
+    if (VecIn2.getValueType() != VecIn1.getValueType() ||
+        VecIn1.getValueType() != VT)
+          return SDValue();
+
+    // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
     if (!isTypeLegal(VT))
       return SDValue();
 
     // Return the new VECTOR_SHUFFLE node.
     SDValue Ops[2];
     Ops[0] = VecIn1;
-    Ops[1] = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+    Ops[1] = VecIn2;
     return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]);
   }
 
@@ -7232,15 +7560,63 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
   unsigned NumElts = VT.getVectorNumElements();
 
   SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
 
   assert(N0.getValueType().getVectorNumElements() == NumElts &&
         "Vector shuffle must be normalized in DAG");
 
-  // FIXME: implement canonicalizations from DAG.getVectorShuffle()
+  // Canonicalize shuffle undef, undef -> undef
+  if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
+    return DAG.getUNDEF(VT);
+
+  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+
+  // Canonicalize shuffle v, v -> v, undef
+  if (N0 == N1) {
+    SmallVector<int, 8> NewMask;
+    for (unsigned i = 0; i != NumElts; ++i) {
+      int Idx = SVN->getMaskElt(i);
+      if (Idx >= (int)NumElts) Idx -= NumElts;
+      NewMask.push_back(Idx);
+    }
+    return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, DAG.getUNDEF(VT),
+                                &NewMask[0]);
+  }
+
+  // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
+  if (N0.getOpcode() == ISD::UNDEF) {
+    SmallVector<int, 8> NewMask;
+    for (unsigned i = 0; i != NumElts; ++i) {
+      int Idx = SVN->getMaskElt(i);
+      if (Idx < 0)
+        NewMask.push_back(Idx);
+      else if (Idx < (int)NumElts)
+        NewMask.push_back(Idx + NumElts);
+      else
+        NewMask.push_back(Idx - NumElts);
+    }
+    return DAG.getVectorShuffle(VT, N->getDebugLoc(), N1, DAG.getUNDEF(VT),
+                                &NewMask[0]);
+  }
+
+  // Remove references to rhs if it is undef
+  if (N1.getOpcode() == ISD::UNDEF) {
+    bool Changed = false;
+    SmallVector<int, 8> NewMask;
+    for (unsigned i = 0; i != NumElts; ++i) {
+      int Idx = SVN->getMaskElt(i);
+      if (Idx >= (int)NumElts) {
+        Idx = -1;
+        Changed = true;
+      }
+      NewMask.push_back(Idx);
+    }
+    if (Changed)
+      return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, N1, &NewMask[0]);
+  }
 
   // If it is a splat, check if the argument vector is another splat or a
   // build_vector with all scalar elements the same.
-  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
     SDNode *V = N0.getNode();
 
@@ -8029,30 +8405,20 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
 /// FindAliasInfo - Extracts the relevant alias information from the memory
 /// node.  Returns true if the operand was a load.
 bool DAGCombiner::FindAliasInfo(SDNode *N,
-                        SDValue &Ptr, int64_t &Size,
-                        const Value *&SrcValue,
-                        int &SrcValueOffset,
-                        unsigned &SrcValueAlign,
-                        const MDNode *&TBAAInfo) const {
-  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
-    Ptr = LD->getBasePtr();
-    Size = LD->getMemoryVT().getSizeInBits() >> 3;
-    SrcValue = LD->getSrcValue();
-    SrcValueOffset = LD->getSrcValueOffset();
-    SrcValueAlign = LD->getOriginalAlignment();
-    TBAAInfo = LD->getTBAAInfo();
-    return true;
-  }
-  if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
-    Ptr = ST->getBasePtr();
-    Size = ST->getMemoryVT().getSizeInBits() >> 3;
-    SrcValue = ST->getSrcValue();
-    SrcValueOffset = ST->getSrcValueOffset();
-    SrcValueAlign = ST->getOriginalAlignment();
-    TBAAInfo = ST->getTBAAInfo();
-    return false;
-  }
-  llvm_unreachable("FindAliasInfo expected a memory operand");
+                                SDValue &Ptr, int64_t &Size,
+                                const Value *&SrcValue,
+                                int &SrcValueOffset,
+                                unsigned &SrcValueAlign,
+                                const MDNode *&TBAAInfo) const {
+  LSBaseSDNode *LS = cast<LSBaseSDNode>(N);
+
+  Ptr = LS->getBasePtr();
+  Size = LS->getMemoryVT().getSizeInBits() >> 3;
+  SrcValue = LS->getSrcValue();
+  SrcValueOffset = LS->getSrcValueOffset();
+  SrcValueAlign = LS->getOriginalAlignment();
+  TBAAInfo = LS->getTBAAInfo();
+  return isa<LoadSDNode>(LS);
 }
 
 /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index b4946ec..fd8ce78 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -561,12 +561,19 @@ bool FastISel::SelectCall(const User *I) {
     return true;
   }
 
+  MachineModuleInfo &MMI = FuncInfo.MF->getMMI();
+  ComputeUsesVAFloatArgument(*Call, &MMI);
+
   const Function *F = Call->getCalledFunction();
   if (!F) return false;
 
   // Handle selected intrinsic function calls.
   switch (F->getIntrinsicID()) {
   default: break;
+    // At -O0 we don't care about the lifetime intrinsics.
+  case Intrinsic::lifetime_start:
+  case Intrinsic::lifetime_end:
+    return true;
   case Intrinsic::dbg_declare: {
     const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call);
     if (!DIVariable(DI->getVariable()).Verify() ||
@@ -630,60 +637,6 @@ bool FastISel::SelectCall(const User *I) {
     }
     return true;
   }
-  case Intrinsic::eh_exception: {
-    EVT VT = TLI.getValueType(Call->getType());
-    if (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)!=TargetLowering::Expand)
-      break;
-
-    assert(FuncInfo.MBB->isLandingPad() &&
-           "Call to eh.exception not in landing pad!");
-    unsigned Reg = TLI.getExceptionAddressRegister();
-    const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
-    unsigned ResultReg = createResultReg(RC);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
-            ResultReg).addReg(Reg);
-    UpdateValueMap(Call, ResultReg);
-    return true;
-  }
-  case Intrinsic::eh_selector: {
-    EVT VT = TLI.getValueType(Call->getType());
-    if (TLI.getOperationAction(ISD::EHSELECTION, VT) != TargetLowering::Expand)
-      break;
-    if (FuncInfo.MBB->isLandingPad())
-      AddCatchInfo(*Call, &FuncInfo.MF->getMMI(), FuncInfo.MBB);
-    else {
-#ifndef NDEBUG
-      FuncInfo.CatchInfoLost.insert(Call);
-#endif
-      // FIXME: Mark exception selector register as live in.  Hack for PR1508.
-      unsigned Reg = TLI.getExceptionSelectorRegister();
-      if (Reg) FuncInfo.MBB->addLiveIn(Reg);
-    }
-
-    unsigned Reg = TLI.getExceptionSelectorRegister();
-    EVT SrcVT = TLI.getPointerTy();
-    const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT);
-    unsigned ResultReg = createResultReg(RC);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
-            ResultReg).addReg(Reg);
-
-    bool ResultRegIsKill = hasTrivialKill(Call);
-
-    // Cast the register to the type of the selector.
-    if (SrcVT.bitsGT(MVT::i32))
-      ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE,
-                             ResultReg, ResultRegIsKill);
-    else if (SrcVT.bitsLT(MVT::i32))
-      ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32,
-                             ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill);
-    if (ResultReg == 0)
-      // Unhandled operand. Halt "fast" selection and bail.
-      return false;
-
-    UpdateValueMap(Call, ResultReg);
-
-    return true;
-  }
   case Intrinsic::objectsize: {
     ConstantInt *CI = cast<ConstantInt>(Call->getArgOperand(1));
     unsigned long long Res = CI->isZero() ? -1ULL : 0;
@@ -775,8 +728,8 @@ bool FastISel::SelectBitCast(const User *I) {
   // First, try to perform the bitcast by inserting a reg-reg copy.
   unsigned ResultReg = 0;
   if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) {
-    TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT);
-    TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT);
+    const TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT);
+    const TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT);
     // Don't attempt a cross-class copy. It will likely fail.
     if (SrcClass == DstClass) {
       ResultReg = createResultReg(DstClass);
@@ -1419,8 +1372,8 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
       // exactly one register for each non-void instruction.
       EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
       if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
-        // Promote MVT::i1.
-        if (VT == MVT::i1)
+        // Handle integer promotions, though, because they're common and easy.
+        if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
           VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT);
         else {
           FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 90d35cc..8dde919 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "function-lowering-info"
+#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
@@ -68,7 +69,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
   GetReturnInfo(Fn->getReturnType(),
                 Fn->getAttributes().getRetAttributes(), Outs, TLI);
   CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), *MF,
-				      Fn->isVarArg(),
+                                      Fn->isVarArg(),
                                       Outs, Fn->getContext());
 
   // Initialize the mapping of values to registers.  This is only set up for
@@ -95,11 +96,13 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
            (TySize >= 8 && isa<ArrayType>(Ty) &&
             cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8)));
         StaticAllocaMap[AI] =
-          MF->getFrameInfo()->CreateStackObject(TySize, Align, false, MayNeedSP);
+          MF->getFrameInfo()->CreateStackObject(TySize, Align, false,
+                                                MayNeedSP);
       }
 
   for (; BB != EB; ++BB)
-    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+         I != E; ++I) {
       // Mark values used outside their block as exported, by allocating
       // a virtual register for them.
       if (isUsedOutsideOfDefiningBlock(I))
@@ -355,7 +358,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
 /// argument. This overrides previous frame index entry for this argument,
 /// if any.
 void FunctionLoweringInfo::setArgumentFrameIndex(const Argument *A,
-                                                      int FI) {
+                                                 int FI) {
   ByValArgFrameIndexMap[A] = FI;
 }
 
@@ -367,10 +370,34 @@ int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) {
     ByValArgFrameIndexMap.find(A);
   if (I != ByValArgFrameIndexMap.end())
     return I->second;
-  DEBUG(dbgs() << "Argument does not have assigned frame index!");
+  DEBUG(dbgs() << "Argument does not have assigned frame index!\n");
   return 0;
 }
 
+/// ComputeUsesVAFloatArgument - Determine if any floating-point values are
+/// being passed to this variadic function, and set the MachineModuleInfo's
+/// usesVAFloatArgument flag if so. This flag is used to emit an undefined
+/// reference to _fltused on Windows, which will link in MSVCRT's
+/// floating-point support.
+void llvm::ComputeUsesVAFloatArgument(const CallInst &I,
+                                      MachineModuleInfo *MMI)
+{
+  FunctionType *FT = cast<FunctionType>(
+    I.getCalledValue()->getType()->getContainedType(0));
+  if (FT->isVarArg() && !MMI->usesVAFloatArgument()) {
+    for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+      Type* T = I.getArgOperand(i)->getType();
+      for (po_iterator<Type*> i = po_begin(T), e = po_end(T);
+           i != e; ++i) {
+        if (i->isFloatingPointTy()) {
+          MMI->setUsesVAFloatArgument(true);
+          return;
+        }
+      }
+    }
+  }
+}
+
 /// AddCatchInfo - Extract the personality and type infos from an eh.selector
 /// call, and add them to the specified machine basic block.
 void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
@@ -425,34 +452,6 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
   }
 }
 
-void llvm::CopyCatchInfo(const BasicBlock *SuccBB, const BasicBlock *LPad,
-                         MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) {
-  SmallPtrSet<const BasicBlock*, 4> Visited;
-
-  // The 'eh.selector' call may not be in the direct successor of a basic block,
-  // but could be several successors deeper. If we don't find it, try going one
-  // level further. <rdar://problem/8824861>
-  while (Visited.insert(SuccBB)) {
-    for (BasicBlock::const_iterator I = SuccBB->begin(), E = --SuccBB->end();
-         I != E; ++I)
-      if (const EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) {
-        // Apply the catch info to LPad.
-        AddCatchInfo(*EHSel, MMI, FLI.MBBMap[LPad]);
-#ifndef NDEBUG
-        if (!FLI.MBBMap[SuccBB]->isLandingPad())
-          FLI.CatchInfoFound.insert(EHSel);
-#endif
-        return;
-      }
-
-    const BranchInst *Br = dyn_cast<BranchInst>(SuccBB->getTerminator());
-    if (Br && Br->isUnconditional())
-      SuccBB = Br->getSuccessor(0);
-    else
-      break;
-  }
-}
-
 /// AddLandingPadInfo - Extract the exception handling information from the
 /// landingpad instruction and add them to the specified machine module info.
 void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI,
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index cb6fd53..1467d88 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -351,6 +351,8 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
     MI->addOperand(MachineOperand::CreateFPImm(CFP));
   } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
     MI->addOperand(MachineOperand::CreateReg(R->getReg(), false));
+  } else if (RegisterMaskSDNode *RM = dyn_cast<RegisterMaskSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateRegMask(RM->getRegMask()));
   } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) {
     MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(), TGA->getOffset(),
                                             TGA->getTargetFlags()));
@@ -574,14 +576,19 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
   for (unsigned i = 1; i != NumOps; ++i) {
     SDValue Op = Node->getOperand(i);
     if ((i & 1) == 0) {
-      unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
-      unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
-      const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
-      const TargetRegisterClass *SRC =
+      RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(i-1));
+      // Skip physical registers as they don't have a vreg to get and we'll
+      // insert copies for them in TwoAddressInstructionPass anyway.
+      if (!R || !TargetRegisterInfo::isPhysicalRegister(R->getReg())) {
+        unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
+        unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
+        const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
+        const TargetRegisterClass *SRC =
         TRI->getMatchingSuperRegClass(RC, TRC, SubIdx);
-      if (SRC && SRC != RC) {
-        MRI->setRegClass(NewVReg, SRC);
-        RC = SRC;
+        if (SRC && SRC != RC) {
+          MRI->setRegClass(NewVReg, SRC);
+          RC = SRC;
+        }
       }
     }
     AddOperand(MI, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false,
@@ -700,33 +707,6 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
   // Create the new machine instruction.
   MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II);
 
-  // The MachineInstr constructor adds implicit-def operands. Scan through
-  // these to determine which are dead.
-  if (MI->getNumOperands() != 0 &&
-      Node->getValueType(Node->getNumValues()-1) == MVT::Glue) {
-    // First, collect all used registers.
-    SmallVector<unsigned, 8> UsedRegs;
-    for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser())
-      if (F->getOpcode() == ISD::CopyFromReg)
-        UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
-      else {
-        // Collect declared implicit uses.
-        const MCInstrDesc &MCID = TII->get(F->getMachineOpcode());
-        UsedRegs.append(MCID.getImplicitUses(),
-                        MCID.getImplicitUses() + MCID.getNumImplicitUses());
-        // In addition to declared implicit uses, we must also check for
-        // direct RegisterSDNode operands.
-        for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
-          if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
-            unsigned Reg = R->getReg();
-            if (TargetRegisterInfo::isPhysicalRegister(Reg))
-              UsedRegs.push_back(Reg);
-          }
-      }
-    // Then mark unused registers as dead.
-    MI->setPhysRegsDeadExcept(UsedRegs, *TRI);
-  }
-
   // Add result register values for things that are defined by this
   // instruction.
   if (NumResults)
@@ -751,30 +731,63 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
   // hook knows where in the block to insert the replacement code.
   MBB->insert(InsertPos, MI);
 
+  // The MachineInstr may also define physregs instead of virtregs.  These
+  // physreg values can reach other instructions in different ways:
+  //
+  // 1. When there is a use of a Node value beyond the explicitly defined
+  //    virtual registers, we emit a CopyFromReg for one of the implicitly
+  //    defined physregs.  This only happens when HasPhysRegOuts is true.
+  //
+  // 2. A CopyFromReg reading a physreg may be glued to this instruction.
+  //
+  // 3. A glued instruction may implicitly use a physreg.
+  //
+  // 4. A glued instruction may use a RegisterSDNode operand.
+  //
+  // Collect all the used physreg defs, and make sure that any unused physreg
+  // defs are marked as dead.
+  SmallVector<unsigned, 8> UsedRegs;
+
   // Additional results must be physical register defs.
   if (HasPhysRegOuts) {
     for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {
       unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()];
-      if (Node->hasAnyUseOfValue(i))
-        EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
-      // If there are no uses, mark the register as dead now, so that
-      // MachineLICM/Sink can see that it's dead. Don't do this if the
-      // node has a Glue value, for the benefit of targets still using
-      // Glue for values in physregs.
-      else if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
-        MI->addRegisterDead(Reg, TRI);
+      if (!Node->hasAnyUseOfValue(i))
+        continue;
+      // This implicitly defined physreg has a use.
+      UsedRegs.push_back(Reg);
+      EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
     }
   }
 
-  // If the instruction has implicit defs and the node doesn't, mark the
-  // implicit def as dead.  If the node has any glue outputs, we don't do this
-  // because we don't know what implicit defs are being used by glued nodes.
-  if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
-    if (const unsigned *IDList = II.getImplicitDefs()) {
-      for (unsigned i = NumResults, e = II.getNumDefs()+II.getNumImplicitDefs();
-           i != e; ++i)
-        MI->addRegisterDead(IDList[i-II.getNumDefs()], TRI);
+  // Scan the glue chain for any used physregs.
+  if (Node->getValueType(Node->getNumValues()-1) == MVT::Glue) {
+    for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) {
+      if (F->getOpcode() == ISD::CopyFromReg) {
+        UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
+        continue;
+      } else if (F->getOpcode() == ISD::CopyToReg) {
+        // Skip CopyToReg nodes that are internal to the glue chain.
+        continue;
+      }
+      // Collect declared implicit uses.
+      const MCInstrDesc &MCID = TII->get(F->getMachineOpcode());
+      UsedRegs.append(MCID.getImplicitUses(),
+                      MCID.getImplicitUses() + MCID.getNumImplicitUses());
+      // In addition to declared implicit uses, we must also check for
+      // direct RegisterSDNode operands.
+      for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
+        if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
+          unsigned Reg = R->getReg();
+          if (TargetRegisterInfo::isPhysicalRegister(Reg))
+            UsedRegs.push_back(Reg);
+        }
     }
+  }
+
+  // Finally mark unused registers as dead.
+  if (!UsedRegs.empty() || II.getImplicitDefs())
+    MI->setPhysRegsDeadExcept(UsedRegs, *TRI);
 
   // Run post-isel target hook to adjust this instruction if needed.
 #ifdef NDEBUG
@@ -794,10 +807,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
     Node->dump();
 #endif
     llvm_unreachable("This target-independent node should have been selected!");
-    break;
   case ISD::EntryToken:
     llvm_unreachable("EntryToken should have been excluded from the schedule!");
-    break;
   case ISD::MERGE_VALUES:
   case ISD::TokenFactor: // fall thru
     break;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 75f5761..31df458 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -85,7 +85,7 @@ private:
   /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
   SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl,
                                      SDValue N1, SDValue N2,
-                                     SmallVectorImpl<int> &Mask) const;
+                                     ArrayRef<int> Mask) const;
 
   void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
                              DebugLoc dl);
@@ -177,7 +177,7 @@ public:
 SDValue
 SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT,  DebugLoc dl,
                                                  SDValue N1, SDValue N2,
-                                             SmallVectorImpl<int> &Mask) const {
+                                                 ArrayRef<int> Mask) const {
   unsigned NumMaskElts = VT.getVectorNumElements();
   unsigned NumDestElts = NVT.getVectorNumElements();
   unsigned NumEltsGrowth = NumDestElts / NumMaskElts;
@@ -893,7 +893,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
     Node->dump( &DAG);
     dbgs() << "\n";
 #endif
-    assert(0 && "Do not know how to legalize this operator!");
+    llvm_unreachable("Do not know how to legalize this operator!");
 
   case ISD::CALLSEQ_START:
   case ISD::CALLSEQ_END:
@@ -910,7 +910,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
       Tmp4 = SDValue(Node, 1);
 
       switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
-      default: assert(0 && "This action is not supported yet!");
+      default: llvm_unreachable("This action is not supported yet!");
       case TargetLowering::Legal:
         // If this is an unaligned load and the target doesn't support it,
         // expand it.
@@ -1079,7 +1079,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
       Tmp2 = Ch;
     } else {
       switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
-      default: assert(0 && "This action is not supported yet!");
+      default: llvm_unreachable("This action is not supported yet!");
       case TargetLowering::Custom:
         isCustom = true;
         // FALLTHROUGH
@@ -1185,7 +1185,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
         Tmp3 = ST->getValue();
         EVT VT = Tmp3.getValueType();
         switch (TLI.getOperationAction(ISD::STORE, VT)) {
-        default: assert(0 && "This action is not supported yet!");
+        default: llvm_unreachable("This action is not supported yet!");
         case TargetLowering::Legal:
           // If this is an unaligned store and the target doesn't support it,
           // expand it.
@@ -1290,7 +1290,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
         ReplaceNode(SDValue(Node, 0), Result);
       } else {
         switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
-        default: assert(0 && "This action is not supported yet!");
+        default: llvm_unreachable("This action is not supported yet!");
         case TargetLowering::Legal:
           // If this is an unaligned store and the target doesn't support it,
           // expand it.
@@ -1556,7 +1556,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
   EVT OpVT = LHS.getValueType();
   ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
   switch (TLI.getCondCodeAction(CCCode, OpVT)) {
-  default: assert(0 && "Unknown condition code action!");
+  default: llvm_unreachable("Unknown condition code action!");
   case TargetLowering::Legal:
     // Nothing to do.
     break;
@@ -1564,7 +1564,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
     ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
     unsigned Opc = 0;
     switch (CCCode) {
-    default: assert(0 && "Don't know how to expand this condition!");
+    default: llvm_unreachable("Don't know how to expand this condition!");
     case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO;  Opc = ISD::AND; break;
     case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO;  Opc = ISD::AND; break;
     case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO;  Opc = ISD::AND; break;
@@ -1699,7 +1699,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
 
   // If all elements are constants, create a load from the constant pool.
   if (isConstant) {
-    std::vector<Constant*> CV;
+    SmallVector<Constant*, 16> CV;
     for (unsigned i = 0, e = NumElems; i != e; ++i) {
       if (ConstantFPSDNode *V =
           dyn_cast<ConstantFPSDNode>(Node->getOperand(i))) {
@@ -1788,7 +1788,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
   std::pair<SDValue, SDValue> CallInfo =
     TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
                     0, TLI.getLibcallCallingConv(LC), isTailCall,
-                    /*isReturnValueUsed=*/true,
+                    /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
                     Callee, Args, DAG, Node->getDebugLoc());
 
   if (!CallInfo.second.getNode())
@@ -1821,7 +1821,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
   std::pair<SDValue,SDValue> CallInfo =
   TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
                   false, 0, TLI.getLibcallCallingConv(LC), false,
-                  /*isReturnValueUsed=*/true,
+                  /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
                   Callee, Args, DAG, dl);
 
   return CallInfo.first;
@@ -1853,7 +1853,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
   std::pair<SDValue, SDValue> CallInfo =
     TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
                     0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
-                    /*isReturnValueUsed=*/true,
+                    /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
                     Callee, Args, DAG, Node->getDebugLoc());
 
   return CallInfo;
@@ -1866,7 +1866,7 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
                                               RTLIB::Libcall Call_PPCF128) {
   RTLIB::Libcall LC;
   switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
-  default: assert(0 && "Unexpected request for libcall!");
+  default: llvm_unreachable("Unexpected request for libcall!");
   case MVT::f32: LC = Call_F32; break;
   case MVT::f64: LC = Call_F64; break;
   case MVT::f80: LC = Call_F80; break;
@@ -1883,7 +1883,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
                                                RTLIB::Libcall Call_I128) {
   RTLIB::Libcall LC;
   switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
-  default: assert(0 && "Unexpected request for libcall!");
+  default: llvm_unreachable("Unexpected request for libcall!");
   case MVT::i8:   LC = Call_I8; break;
   case MVT::i16:  LC = Call_I16; break;
   case MVT::i32:  LC = Call_I32; break;
@@ -1898,7 +1898,7 @@ static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
                                      const TargetLowering &TLI) {
   RTLIB::Libcall LC;
   switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
-  default: assert(0 && "Unexpected request for libcall!");
+  default: llvm_unreachable("Unexpected request for libcall!");
   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
@@ -1943,7 +1943,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
 
   RTLIB::Libcall LC;
   switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
-  default: assert(0 && "Unexpected request for libcall!");
+  default: llvm_unreachable("Unexpected request for libcall!");
   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
@@ -1985,7 +1985,8 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
   std::pair<SDValue, SDValue> CallInfo =
     TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
                     0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
-                    /*isReturnValueUsed=*/true, Callee, Args, DAG, dl);
+                    /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+                    Callee, Args, DAG, dl);
 
   // Remainder is loaded back from the stack frame.
   SDValue Rem = DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr,
@@ -2160,7 +2161,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
   // offset depending on the data type.
   uint64_t FF;
   switch (Op0.getValueType().getSimpleVT().SimpleTy) {
-  default: assert(0 && "Unsupported integer type!");
+  default: llvm_unreachable("Unsupported integer type!");
   case MVT::i8 : FF = 0x43800000ULL; break;  // 2^8  (as a float)
   case MVT::i16: FF = 0x47800000ULL; break;  // 2^16 (as a float)
   case MVT::i32: FF = 0x4F800000ULL; break;  // 2^32 (as a float)
@@ -2282,7 +2283,7 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
   EVT SHVT = TLI.getShiftAmountTy(VT);
   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
   switch (VT.getSimpleVT().SimpleTy) {
-  default: assert(0 && "Unhandled Expand type in BSWAP!");
+  default: llvm_unreachable("Unhandled Expand type in BSWAP!");
   case MVT::i16:
     Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
     Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
@@ -2339,7 +2340,7 @@ static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
 SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
                                              DebugLoc dl) {
   switch (Opc) {
-  default: assert(0 && "Cannot expand this yet!");
+  default: llvm_unreachable("Cannot expand this yet!");
   case ISD::CTPOP: {
     EVT VT = Op.getValueType();
     EVT ShVT = TLI.getShiftAmountTy(VT);
@@ -2438,7 +2439,6 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
   switch (Opc) {
   default:
     llvm_unreachable("Unhandled atomic intrinsic Expand!");
-    break;
   case ISD::ATOMIC_SWAP:
     switch (VT.SimpleTy) {
     default: llvm_unreachable("Unexpected value type for atomic!");
@@ -2564,7 +2564,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
                       false, false, false, false, 0, CallingConv::C,
                       /*isTailCall=*/false,
-                      /*isReturnValueUsed=*/true,
+                      /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
                       DAG.getExternalSymbol("__sync_synchronize",
                                             TLI.getPointerTy()),
                       Args, DAG, dl);
@@ -2641,7 +2641,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
                       false, false, false, false, 0, CallingConv::C,
                       /*isTailCall=*/false,
-                      /*isReturnValueUsed=*/true,
+                      /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
                       DAG.getExternalSymbol("abort", TLI.getPointerTy()),
                       Args, DAG, dl);
     Results.push_back(CallResult.second);
@@ -2795,15 +2795,57 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
                                               Node->getOperand(2), dl));
     break;
   case ISD::VECTOR_SHUFFLE: {
-    SmallVector<int, 8> Mask;
-    cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
+    SmallVector<int, 32> NewMask;
+    ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask();
 
     EVT VT = Node->getValueType(0);
     EVT EltVT = VT.getVectorElementType();
-    if (!TLI.isTypeLegal(EltVT))
-      EltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT);
+    SDValue Op0 = Node->getOperand(0);
+    SDValue Op1 = Node->getOperand(1);
+    if (!TLI.isTypeLegal(EltVT)) {
+
+      EVT NewEltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT);
+
+      // BUILD_VECTOR operands are allowed to be wider than the element type.
+      // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept it
+      if (NewEltVT.bitsLT(EltVT)) {
+
+        // Convert shuffle node.
+        // If original node was v4i64 and the new EltVT is i32,
+        // cast operands to v8i32 and re-build the mask.
+
+        // Calculate new VT, the size of the new VT should be equal to original.
+        EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, 
+                                      VT.getSizeInBits()/NewEltVT.getSizeInBits());
+        assert(NewVT.bitsEq(VT));
+
+        // cast operands to new VT
+        Op0 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op0);
+        Op1 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op1);
+
+        // Convert the shuffle mask
+        unsigned int factor = NewVT.getVectorNumElements()/VT.getVectorNumElements();
+
+        // EltVT gets smaller
+        assert(factor > 0);
+
+        for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
+          if (Mask[i] < 0) {
+            for (unsigned fi = 0; fi < factor; ++fi)
+              NewMask.push_back(Mask[i]);
+          }
+          else {
+            for (unsigned fi = 0; fi < factor; ++fi)
+              NewMask.push_back(Mask[i]*factor+fi);
+          }
+        }
+        Mask = NewMask;
+        VT = NewVT;
+      }
+      EltVT = NewEltVT;
+    }
     unsigned NumElems = VT.getVectorNumElements();
-    SmallVector<SDValue, 8> Ops;
+    SmallVector<SDValue, 16> Ops;
     for (unsigned i = 0; i != NumElems; ++i) {
       if (Mask[i] < 0) {
         Ops.push_back(DAG.getUNDEF(EltVT));
@@ -2812,14 +2854,17 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       unsigned Idx = Mask[i];
       if (Idx < NumElems)
         Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
-                                  Node->getOperand(0),
+                                  Op0,
                                   DAG.getIntPtrConstant(Idx)));
       else
         Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
-                                  Node->getOperand(1),
+                                  Op1,
                                   DAG.getIntPtrConstant(Idx - NumElems)));
     }
+
     Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
+    // We may have changed the BUILD_VECTOR type. Cast it back to the Node type.
+    Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), Tmp1);
     Results.push_back(Tmp1);
     break;
   }
@@ -2980,7 +3025,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     break;
   }
   case ISD::EXCEPTIONADDR: {
-    unsigned Reg = TLI.getExceptionAddressRegister();
+    unsigned Reg = TLI.getExceptionPointerRegister();
     assert(Reg && "Can't expand to unknown register!");
     Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, Reg,
                                          Node->getValueType(0)));
@@ -3520,8 +3565,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
     break;
   }
   case ISD::VECTOR_SHUFFLE: {
-    SmallVector<int, 8> Mask;
-    cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
+    ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask();
 
     // Cast the two input vectors.
     Tmp1 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(0));
@@ -3546,6 +3590,24 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
                                   Tmp1, Tmp2, Node->getOperand(2)));
     break;
   }
+  case ISD::FPOW: {
+    Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+    Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
+    Tmp3 = DAG.getNode(ISD::FPOW, dl, NVT, Tmp1, Tmp2);
+    Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
+                                  Tmp3, DAG.getIntPtrConstant(0)));
+    break;
+  }
+  case ISD::FLOG2:
+  case ISD::FEXP2:
+  case ISD::FLOG:
+  case ISD::FEXP: {
+    Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+    Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
+    Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
+                                  Tmp2, DAG.getIntPtrConstant(0)));
+    break;
+  }
   }
 
   // Replace the original node with the legalized result.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 6732d37..e393896 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -672,7 +672,7 @@ void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
     case ISD::SETUEQ:
       LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64;
       break;
-    default: assert(false && "Do not know how to soften this setcc!");
+    default: llvm_unreachable("Do not know how to soften this setcc!");
     }
   }
 
@@ -1212,7 +1212,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
 
   switch (SrcVT.getSimpleVT().SimpleTy) {
   default:
-    assert(false && "Unsupported UINT_TO_FP!");
+    llvm_unreachable("Unsupported UINT_TO_FP!");
   case MVT::i32:
     Parts = TwoE32;
     break;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 1c02c4f..41506d1 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -212,9 +212,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
 
   switch (getTypeAction(InVT)) {
-  default:
-    assert(false && "Unknown type action!");
-    break;
   case TargetLowering::TypeLegal:
     break;
   case TargetLowering::TypePromoteInteger:
@@ -252,9 +249,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
     return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp);
   }
   case TargetLowering::TypeWidenVector:
-    if (OutVT.bitsEq(NInVT))
-      // The input is widened to the same size.  Convert to the widened value.
-      return DAG.getNode(ISD::BITCAST, dl, OutVT, GetWidenedVector(InOp));
+    // The input is widened to the same size. Convert to the widened value.
+    // Make sure that the outgoing value is not a vector, because this would
+    // make us bitcast between two vectors which are legalized in different ways.
+    if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector())
+      return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp));
   }
 
   return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
@@ -489,7 +488,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) {
-  SDValue Mask = GetPromotedInteger(N->getOperand(0));
+  SDValue Mask = N->getOperand(0);
+  EVT OpTy = N->getOperand(1).getValueType();
+
+  // Promote all the way up to the canonical SetCC type.
+  Mask = PromoteTargetBoolean(Mask, TLI.getSetCCResultType(OpTy));
   SDValue LHS = GetPromotedInteger(N->getOperand(1));
   SDValue RHS = GetPromotedInteger(N->getOperand(2));
   return DAG.getNode(ISD::VSELECT, N->getDebugLoc(),
@@ -1176,7 +1179,6 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
   switch (Opc) {
   default:
     llvm_unreachable("Unhandled atomic intrinsic Expand!");
-    break;
   case ISD::ATOMIC_SWAP:
     switch (VT.SimpleTy) {
     default: llvm_unreachable("Unexpected value type for atomic!");
@@ -1395,15 +1397,15 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
     }
   }
 
-#if 0
-  // FIXME: This code is broken for shifts with a zero amount!
   // If we know that all of the high bits of the shift amount are zero, then we
   // can do this as a couple of simple shifts.
   if ((KnownZero & HighBitMask) == HighBitMask) {
-    // Compute 32-amt.
-    SDValue Amt2 = DAG.getNode(ISD::SUB, ShTy,
-                                 DAG.getConstant(NVTBits, ShTy),
-                                 Amt);
+    // Calculate 31-x. 31 is used instead of 32 to avoid creating an undefined
+    // shift if x is zero.  We can use XOR here because x is known to be smaller
+    // than 32.
+    SDValue Amt2 = DAG.getNode(ISD::XOR, dl, ShTy, Amt,
+                               DAG.getConstant(NVTBits-1, ShTy));
+
     unsigned Op1, Op2;
     switch (N->getOpcode()) {
     default: llvm_unreachable("Unknown shift");
@@ -1412,13 +1414,23 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
     case ISD::SRA:  Op1 = ISD::SRL; Op2 = ISD::SHL; break;
     }
 
-    Lo = DAG.getNode(N->getOpcode(), NVT, InL, Amt);
-    Hi = DAG.getNode(ISD::OR, NVT,
-                     DAG.getNode(Op1, NVT, InH, Amt),
-                     DAG.getNode(Op2, NVT, InL, Amt2));
+    // When shifting right the arithmetic for Lo and Hi is swapped.
+    if (N->getOpcode() != ISD::SHL)
+      std::swap(InL, InH);
+
+    // Use a little trick to get the bits that move from Lo to Hi. First
+    // shift by one bit.
+    SDValue Sh1 = DAG.getNode(Op2, dl, NVT, InL, DAG.getConstant(1, ShTy));
+    // Then compute the remaining shift with amount-1.
+    SDValue Sh2 = DAG.getNode(Op2, dl, NVT, Sh1, Amt2);
+
+    Lo = DAG.getNode(N->getOpcode(), dl, NVT, InL, Amt);
+    Hi = DAG.getNode(ISD::OR, dl, NVT, DAG.getNode(Op1, dl, NVT, InH, Amt),Sh2);
+
+    if (N->getOpcode() != ISD::SHL)
+      std::swap(Hi, Lo);
     return true;
   }
-#endif
 
   return false;
 }
@@ -1498,8 +1510,6 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
     Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
     return true;
   }
-
-  return false;
 }
 
 void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
@@ -2311,8 +2321,10 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
   SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT);
   std::pair<SDValue, SDValue> CallInfo =
     TLI.LowerCallTo(Chain, RetTy, true, false, false, false,
-		    0, TLI.getLibcallCallingConv(LC), false,
-		    true, Func, Args, DAG, dl);
+		    0, TLI.getLibcallCallingConv(LC),
+                    /*isTailCall=*/false,
+		    /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+                    Func, Args, DAG, dl);
 
   SplitInteger(CallInfo.first, Lo, Hi);
   SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp,
@@ -2787,7 +2799,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
     else if (SrcVT == MVT::i128)
       FF = APInt(32, F32TwoE128);
     else
-      assert(false && "Unsupported UINT_TO_FP!");
+      llvm_unreachable("Unsupported UINT_TO_FP!");
 
     // Check whether the sign bit is set.
     SDValue Lo, Hi;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 04a6a4a..439aa4d 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -222,8 +222,6 @@ bool DAGTypeLegalizer::run() {
     for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) {
       EVT ResultVT = N->getValueType(i);
       switch (getTypeAction(ResultVT)) {
-      default:
-        assert(false && "Unknown action!");
       case TargetLowering::TypeLegal:
         break;
       // The following calls must take care of *all* of the node's results,
@@ -275,8 +273,6 @@ ScanOperands:
 
       EVT OpVT = N->getOperand(i).getValueType();
       switch (getTypeAction(OpVT)) {
-      default:
-        assert(false && "Unknown action!");
       case TargetLowering::TypeLegal:
         continue;
       // The following calls must either replace all of the node's results
@@ -752,7 +748,11 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
 }
 
 void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
-  assert(Result.getValueType() == Op.getValueType().getVectorElementType() &&
+  // Note that in some cases vector operation operands may be greater than
+  // the vector element type. For example BUILD_VECTOR of type <1 x i1> with
+  // a constant i8 operand.
+  assert(Result.getValueType().getSizeInBits() >=
+         Op.getValueType().getVectorElementType().getSizeInBits() &&
          "Invalid type for scalarized vector");
   AnalyzeNewValue(Result);
 
@@ -1056,8 +1056,9 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
   Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
   std::pair<SDValue,SDValue> CallInfo =
     TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
-                    false, 0, TLI.getLibcallCallingConv(LC), false,
-                    /*isReturnValueUsed=*/true,
+                    false, 0, TLI.getLibcallCallingConv(LC),
+                    /*isTailCall=*/false,
+                    /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
                     Callee, Args, DAG, dl);
   return CallInfo.first;
 }
@@ -1088,7 +1089,7 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
   std::pair<SDValue, SDValue> CallInfo =
     TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
                     0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
-                    /*isReturnValueUsed=*/true,
+                    /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
                     Callee, Args, DAG, Node->getDebugLoc());
 
   return CallInfo;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 7c5472b..a8ff7c6 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -45,8 +45,6 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
 
   // Handle some special cases efficiently.
   switch (getTypeAction(InVT)) {
-    default:
-      assert(false && "Unknown type action!");
     case TargetLowering::TypeLegal:
     case TargetLowering::TypePromoteInteger:
       break;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 4696c0d..3ae8345 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -141,7 +141,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
     EVT ValVT = ST->getValue().getValueType();
     if (StVT.isVector() && ST->isTruncatingStore())
       switch (TLI.getTruncStoreAction(ValVT, StVT)) {
-      default: assert(0 && "This action is not supported yet!");
+      default: llvm_unreachable("This action is not supported yet!");
       case TargetLowering::Legal:
         return TranslateLegalizeResults(Op, Result);
       case TargetLowering::Custom:
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 7ca0d1e..a8aee12 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -404,6 +404,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
         N->dump(&DAG);
         dbgs() << "\n");
   SDValue Lo, Hi;
+  
+  // See if the target wants to custom expand this node.
+  if (CustomLowerNode(N, N->getValueType(ResNo), true))
+    return;
 
   switch (N->getOpcode()) {
   default:
@@ -1565,12 +1569,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
 
   switch (getTypeAction(InVT)) {
-  default:
-    assert(false && "Unknown type action!");
-    break;
   case TargetLowering::TypeLegal:
     break;
   case TargetLowering::TypePromoteInteger:
+    // If the incoming type is a vector that is being promoted, then
+    // we know that the elements are arranged differently and that we
+    // must perform the conversion using a stack slot.
+    if (InVT.isVector())
+      break;
+
     // If the InOp is promoted to the same size, convert it.  Otherwise,
     // fall out of the switch and widen the promoted input.
     InOp = GetPromotedInteger(InOp);
@@ -2326,19 +2333,37 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain,
     BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
                           DAG.getIntPtrConstant(Increment));
 
+    SDValue L;
     if (LdWidth < NewVTWidth) {
       // Our current type we are using is too large, find a better size
       NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
       NewVTWidth = NewVT.getSizeInBits();
-    }
-
-    SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr,
+      L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
                                LD->getPointerInfo().getWithOffset(Offset),
                                isVolatile,
                                isNonTemporal, isInvariant,
                                MinAlign(Align, Increment));
-    LdChain.push_back(LdOp.getValue(1));
-    LdOps.push_back(LdOp);
+      LdChain.push_back(L.getValue(1));
+      if (L->getValueType(0).isVector()) {
+        SmallVector<SDValue, 16> Loads;
+        Loads.push_back(L);
+        unsigned size = L->getValueSizeInBits(0);
+        while (size < LdOp->getValueSizeInBits(0)) {
+          Loads.push_back(DAG.getUNDEF(L->getValueType(0)));
+          size += L->getValueSizeInBits(0);
+        }
+        L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0),
+                        &Loads[0], Loads.size());
+      }
+    } else {
+      L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
+                      LD->getPointerInfo().getWithOffset(Offset), isVolatile,
+                      isNonTemporal, isInvariant, MinAlign(Align, Increment));
+      LdChain.push_back(L.getValue(1));
+    }
+
+    LdOps.push_back(L);
+
 
     LdWidth -= NewVTWidth;
   }
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
new file mode 100644
index 0000000..1a27f3f
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -0,0 +1,657 @@
+//===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ResourcePriorityQueue class, which is a
+// SchedulingPriorityQueue that prioritizes instructions using DFA state to
+// reduce the length of the critical path through the basic block
+// on VLIW platforms.
+// The scheduler is basically a top-down adaptable list scheduler with DFA
+// resource tracking added to the cost function.
+// DFA is queried as a state machine to model "packets/bundles" during
+// schedule. Currently packets/bundles are discarded at the end of
+// scheduling, affecting only order of instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "scheduler"
+#include "llvm/CodeGen/ResourcePriorityQueue.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetLowering.h"
+
+using namespace llvm;
+
+static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden,
+  cl::ZeroOrMore, cl::init(false),
+  cl::desc("Disable use of DFA during scheduling"));
+
+static cl::opt<signed> RegPressureThreshold(
+  "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5),
+  cl::desc("Track reg pressure and switch priority to in-depth"));
+
+
+ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) :
+  Picker(this),
+  InstrItins(IS->getTargetLowering().getTargetMachine().getInstrItineraryData())
+{
+   TII = IS->getTargetLowering().getTargetMachine().getInstrInfo();
+   TRI = IS->getTargetLowering().getTargetMachine().getRegisterInfo();
+   TLI = &IS->getTargetLowering();
+
+   const TargetMachine &tm = (*IS->MF).getTarget();
+   ResourcesModel = tm.getInstrInfo()->CreateTargetScheduleState(&tm,NULL);
+   // This hard requirment could be relaxed, but for now
+   // do not let it procede.
+   assert (ResourcesModel && "Unimplemented CreateTargetScheduleState.");
+
+   unsigned NumRC = TRI->getNumRegClasses();
+   RegLimit.resize(NumRC);
+   RegPressure.resize(NumRC);
+   std::fill(RegLimit.begin(), RegLimit.end(), 0);
+   std::fill(RegPressure.begin(), RegPressure.end(), 0);
+   for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+        E = TRI->regclass_end(); I != E; ++I)
+     RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, *IS->MF);
+
+   ParallelLiveRanges = 0;
+   HorizontalVerticalBalance = 0;
+}
+
+unsigned
+ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
+  unsigned NumberDeps = 0;
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl())
+      continue;
+
+    SUnit *PredSU = I->getSUnit();
+    const SDNode *ScegN = PredSU->getNode();
+
+    if (!ScegN)
+      continue;
+
+    // If value is passed to CopyToReg, it is probably
+    // live outside BB.
+    switch (ScegN->getOpcode()) {
+      default:  break;
+      case ISD::TokenFactor:    break;
+      case ISD::CopyFromReg:    NumberDeps++;  break;
+      case ISD::CopyToReg:      break;
+      case ISD::INLINEASM:      break;
+    }
+    if (!ScegN->isMachineOpcode())
+      continue;
+
+    for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
+      EVT VT = ScegN->getValueType(i);
+      if (TLI->isTypeLegal(VT)
+         && (TLI->getRegClassFor(VT)->getID() == RCId)) {
+        NumberDeps++;
+        break;
+      }
+    }
+  }
+  return NumberDeps;
+}
+
+unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU,
+                                                    unsigned RCId) {
+  unsigned NumberDeps = 0;
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    if (I->isCtrl())
+      continue;
+
+    SUnit *SuccSU = I->getSUnit();
+    const SDNode *ScegN = SuccSU->getNode();
+    if (!ScegN)
+      continue;
+
+    // If value is passed to CopyToReg, it is probably
+    // live outside BB.
+    switch (ScegN->getOpcode()) {
+      default:  break;
+      case ISD::TokenFactor:    break;
+      case ISD::CopyFromReg:    break;
+      case ISD::CopyToReg:      NumberDeps++;  break;
+      case ISD::INLINEASM:      break;
+    }
+    if (!ScegN->isMachineOpcode())
+      continue;
+
+    for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
+      const SDValue &Op = ScegN->getOperand(i);
+      EVT VT = Op.getNode()->getValueType(Op.getResNo());
+      if (TLI->isTypeLegal(VT)
+         && (TLI->getRegClassFor(VT)->getID() == RCId)) {
+        NumberDeps++;
+        break;
+      }
+    }
+  }
+  return NumberDeps;
+}
+
+static unsigned numberCtrlDepsInSU(SUnit *SU) {
+  unsigned NumberDeps = 0;
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I)
+    if (I->isCtrl())
+      NumberDeps++;
+
+  return NumberDeps;
+}
+
+static unsigned numberCtrlPredInSU(SUnit *SU) {
+  unsigned NumberDeps = 0;
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I)
+    if (I->isCtrl())
+      NumberDeps++;
+
+  return NumberDeps;
+}
+
+///
+/// Initialize nodes.
+///
+void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) {
+  SUnits = &sunits;
+  NumNodesSolelyBlocking.resize(SUnits->size(), 0);
+
+  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
+    SUnit *SU = &(*SUnits)[i];
+    initNumRegDefsLeft(SU);
+    SU->NodeQueueId = 0;
+  }
+}
+
+/// This heuristic is used if DFA scheduling is not desired
+/// for some VLIW platform.
+bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
+  // The isScheduleHigh flag allows nodes with wraparound dependencies that
+  // cannot easily be modeled as edges with latencies to be scheduled as
+  // soon as possible in a top-down schedule.
+  if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
+    return false;
+
+  if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
+    return true;
+
+  unsigned LHSNum = LHS->NodeNum;
+  unsigned RHSNum = RHS->NodeNum;
+
+  // The most important heuristic is scheduling the critical path.
+  unsigned LHSLatency = PQ->getLatency(LHSNum);
+  unsigned RHSLatency = PQ->getLatency(RHSNum);
+  if (LHSLatency < RHSLatency) return true;
+  if (LHSLatency > RHSLatency) return false;
+
+  // After that, if two nodes have identical latencies, look to see if one will
+  // unblock more other nodes than the other.
+  unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
+  unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
+  if (LHSBlocked < RHSBlocked) return true;
+  if (LHSBlocked > RHSBlocked) return false;
+
+  // Finally, just to provide a stable ordering, use the node number as a
+  // deciding factor.
+  return LHSNum < RHSNum;
+}
+
+
+/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
+/// of SU, return it, otherwise return null.
+SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
+  SUnit *OnlyAvailablePred = 0;
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    SUnit &Pred = *I->getSUnit();
+    if (!Pred.isScheduled) {
+      // We found an available, but not scheduled, predecessor.  If it's the
+      // only one we have found, keep track of it... otherwise give up.
+      if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
+        return 0;
+      OnlyAvailablePred = &Pred;
+    }
+  }
+  return OnlyAvailablePred;
+}
+
+void ResourcePriorityQueue::push(SUnit *SU) {
+  // Look at all of the successors of this node.  Count the number of nodes that
+  // this node is the sole unscheduled node for.
+  unsigned NumNodesBlocking = 0;
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I)
+    if (getSingleUnscheduledPred(I->getSUnit()) == SU)
+      ++NumNodesBlocking;
+
+  NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
+  Queue.push_back(SU);
+}
+
+/// Check if scheduling of this SU is possible
+/// in the current packet.
+bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
+  if (!SU || !SU->getNode())
+    return false;
+
+  // If this is a compound instruction,
+  // it is likely to be a call. Do not delay it.
+  if (SU->getNode()->getGluedNode())
+    return true;
+
+  // First see if the pipeline could receive this instruction
+  // in the current cycle.
+  if (SU->getNode()->isMachineOpcode())
+    switch (SU->getNode()->getMachineOpcode()) {
+    default:
+      if (!ResourcesModel->canReserveResources(&TII->get(
+          SU->getNode()->getMachineOpcode())))
+           return false;
+    case TargetOpcode::EXTRACT_SUBREG:
+    case TargetOpcode::INSERT_SUBREG:
+    case TargetOpcode::SUBREG_TO_REG:
+    case TargetOpcode::REG_SEQUENCE:
+    case TargetOpcode::IMPLICIT_DEF:
+        break;
+    }
+
+  // Now see if there are no other dependencies
+  // to instructions alredy in the packet.
+  for (unsigned i = 0, e = Packet.size(); i != e; ++i)
+    for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
+         E = Packet[i]->Succs.end(); I != E; ++I) {
+      // Since we do not add pseudos to packets, might as well
+      // ignor order deps.
+      if (I->isCtrl())
+        continue;
+
+      if (I->getSUnit() == SU)
+        return false;
+    }
+
+  return true;
+}
+
+/// Keep track of available resources.
+void ResourcePriorityQueue::reserveResources(SUnit *SU) {
+  // If this SU does not fit in the packet
+  // start a new one.
+  if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) {
+    ResourcesModel->clearResources();
+    Packet.clear();
+  }
+
+  if (SU->getNode() && SU->getNode()->isMachineOpcode()) {
+    switch (SU->getNode()->getMachineOpcode()) {
+    default:
+      ResourcesModel->reserveResources(&TII->get(
+        SU->getNode()->getMachineOpcode()));
+      break;
+    case TargetOpcode::EXTRACT_SUBREG:
+    case TargetOpcode::INSERT_SUBREG:
+    case TargetOpcode::SUBREG_TO_REG:
+    case TargetOpcode::REG_SEQUENCE:
+    case TargetOpcode::IMPLICIT_DEF:
+      break;
+    }
+    Packet.push_back(SU);
+  }
+  // Forcefully end packet for PseudoOps.
+  else {
+    ResourcesModel->clearResources();
+    Packet.clear();
+  }
+
+  // If packet is now full, reset the state so in the next cycle
+  // we start fresh.
+  if (Packet.size() >= InstrItins->IssueWidth) {
+    ResourcesModel->clearResources();
+    Packet.clear();
+  }
+}
+
+signed ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
+  signed RegBalance    = 0;
+
+  if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
+    return RegBalance;
+
+  // Gen estimate.
+  for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) {
+      EVT VT = SU->getNode()->getValueType(i);
+      if (TLI->isTypeLegal(VT)
+          && TLI->getRegClassFor(VT)
+          && TLI->getRegClassFor(VT)->getID() == RCId)
+        RegBalance += numberRCValSuccInSU(SU, RCId);
+  }
+  // Kill estimate.
+  for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) {
+      const SDValue &Op = SU->getNode()->getOperand(i);
+      EVT VT = Op.getNode()->getValueType(Op.getResNo());
+      if (isa<ConstantSDNode>(Op.getNode()))
+        continue;
+
+      if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT)
+          && TLI->getRegClassFor(VT)->getID() == RCId)
+        RegBalance -= numberRCValPredInSU(SU, RCId);
+  }
+  return RegBalance;
+}
+
+/// Estimates change in reg pressure from this SU.
+/// It is acheived by trivial tracking of defined
+/// and used vregs in dependent instructions.
+/// The RawPressure flag makes this function to ignore
+/// existing reg file sizes, and report raw def/use
+/// balance.
+signed ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) {
+  signed RegBalance    = 0;
+
+  if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
+    return RegBalance;
+
+  if (RawPressure) {
+    for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+             E = TRI->regclass_end(); I != E; ++I) {
+      const TargetRegisterClass *RC = *I;
+      RegBalance += rawRegPressureDelta(SU, RC->getID());
+    }
+  }
+  else {
+    for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+         E = TRI->regclass_end(); I != E; ++I) {
+      const TargetRegisterClass *RC = *I;
+      if ((RegPressure[RC->getID()] +
+           rawRegPressureDelta(SU, RC->getID()) > 0) &&
+          (RegPressure[RC->getID()] +
+           rawRegPressureDelta(SU, RC->getID())  >= RegLimit[RC->getID()]))
+        RegBalance += rawRegPressureDelta(SU, RC->getID());
+    }
+  }
+
+  return RegBalance;
+}
+
+// Constants used to denote relative importance of
+// heuristic components for cost computation.
+static const unsigned PriorityOne = 200;
+static const unsigned PriorityTwo = 100;
+static const unsigned PriorityThree = 50;
+static const unsigned PriorityFour = 15;
+static const unsigned PriorityFive = 5;
+static const unsigned ScaleOne = 20;
+static const unsigned ScaleTwo = 10;
+static const unsigned ScaleThree = 5;
+static const unsigned FactorOne = 2;
+
+/// Returns single number reflecting benefit of scheduling SU
+/// in the current cycle.
+signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
+  // Initial trivial priority.
+  signed ResCount = 1;
+
+  // Do not waste time on a node that is already scheduled.
+  if (SU->isScheduled)
+    return ResCount;
+
+  // Forced priority is high.
+  if (SU->isScheduleHigh)
+    ResCount += PriorityOne;
+
+  // Adaptable scheduling
+  // A small, but very parallel
+  // region, where reg pressure is an issue.
+  if (HorizontalVerticalBalance > RegPressureThreshold) {
+    // Critical path first
+    ResCount += (SU->getHeight() * ScaleTwo);
+    // If resources are available for it, multiply the
+    // chance of scheduling.
+    if (isResourceAvailable(SU))
+      ResCount <<= FactorOne;
+
+    // Consider change to reg pressure from scheduling
+    // this SU.
+    ResCount -= (regPressureDelta(SU,true) * ScaleOne);
+  }
+  // Default heuristic, greeady and
+  // critical path driven.
+  else {
+    // Critical path first.
+    ResCount += (SU->getHeight() * ScaleTwo);
+    // Now see how many instructions is blocked by this SU.
+    ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo);
+    // If resources are available for it, multiply the
+    // chance of scheduling.
+    if (isResourceAvailable(SU))
+      ResCount <<= FactorOne;
+
+    ResCount -= (regPressureDelta(SU) * ScaleTwo);
+  }
+
+  // These are platform specific things.
+  // Will need to go into the back end
+  // and accessed from here via a hook.
+  for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
+    if (N->isMachineOpcode()) {
+      const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
+      if (TID.isCall())
+        ResCount += (PriorityThree + (ScaleThree*N->getNumValues()));
+    }
+    else
+      switch (N->getOpcode()) {
+      default:  break;
+      case ISD::TokenFactor:
+      case ISD::CopyFromReg:
+      case ISD::CopyToReg:
+        ResCount += PriorityFive;
+        break;
+
+      case ISD::INLINEASM:
+        ResCount += PriorityFour;
+        break;
+      }
+  }
+  return ResCount;
+}
+
+
+/// Main resource tracking point.
+void ResourcePriorityQueue::ScheduledNode(SUnit *SU) {
+  // Use NULL entry as an event marker to reset
+  // the DFA state.
+  if (!SU) {
+    ResourcesModel->clearResources();
+    Packet.clear();
+    return;
+  }
+
+  const SDNode *ScegN = SU->getNode();
+  // Update reg pressure tracking.
+  // First update current node.
+  if (ScegN->isMachineOpcode()) {
+    // Estimate generated regs.
+    for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
+      EVT VT = ScegN->getValueType(i);
+
+      if (TLI->isTypeLegal(VT)) {
+        const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
+        if (RC)
+          RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID());
+      }
+    }
+    // Estimate killed regs.
+    for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
+      const SDValue &Op = ScegN->getOperand(i);
+      EVT VT = Op.getNode()->getValueType(Op.getResNo());
+
+      if (TLI->isTypeLegal(VT)) {
+        const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
+        if (RC) {
+          if (RegPressure[RC->getID()] >
+            (numberRCValPredInSU(SU, RC->getID())))
+            RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID());
+          else RegPressure[RC->getID()] = 0;
+        }
+      }
+    }
+    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+                              I != E; ++I) {
+      if (I->isCtrl() || (I->getSUnit()->NumRegDefsLeft == 0))
+        continue;
+      --I->getSUnit()->NumRegDefsLeft;
+    }
+  }
+
+  // Reserve resources for this SU.
+  reserveResources(SU);
+
+  // Adjust number of parallel live ranges.
+  // Heuristic is simple - node with no data successors reduces
+  // number of live ranges. All others, increase it.
+  unsigned NumberNonControlDeps = 0;
+
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+                                  I != E; ++I) {
+    adjustPriorityOfUnscheduledPreds(I->getSUnit());
+    if (!I->isCtrl())
+      NumberNonControlDeps++;
+  }
+
+  if (!NumberNonControlDeps) {
+    if (ParallelLiveRanges >= SU->NumPreds)
+      ParallelLiveRanges -= SU->NumPreds;
+    else
+      ParallelLiveRanges = 0;
+
+  }
+  else
+    ParallelLiveRanges += SU->NumRegDefsLeft;
+
+  // Track parallel live chains.
+  HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU));
+  HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU));
+}
+
+void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) {
+  unsigned  NodeNumDefs = 0;
+  for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+    if (N->isMachineOpcode()) {
+      const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
+      // No register need be allocated for this.
+      if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
+        NodeNumDefs = 0;
+        break;
+      }
+      NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs());
+    }
+    else
+      switch(N->getOpcode()) {
+        default:     break;
+        case ISD::CopyFromReg:
+          NodeNumDefs++;
+          break;
+        case ISD::INLINEASM:
+          NodeNumDefs++;
+          break;
+      }
+
+  SU->NumRegDefsLeft = NodeNumDefs;
+}
+
+/// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
+/// scheduled.  If SU is not itself available, then there is at least one
+/// predecessor node that has not been scheduled yet.  If SU has exactly ONE
+/// unscheduled predecessor, we want to increase its priority: it getting
+/// scheduled will make this node available, so it is better than some other
+/// node of the same priority that will not make a node available.
+void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) {
+  if (SU->isAvailable) return;  // All preds scheduled.
+
+  SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
+  if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable)
+    return;
+
+  // Okay, we found a single predecessor that is available, but not scheduled.
+  // Since it is available, it must be in the priority queue.  First remove it.
+  remove(OnlyAvailablePred);
+
+  // Reinsert the node into the priority queue, which recomputes its
+  // NumNodesSolelyBlocking value.
+  push(OnlyAvailablePred);
+}
+
+
+/// Main access point - returns next instructions
+/// to be placed in scheduling sequence.
+SUnit *ResourcePriorityQueue::pop() {
+  if (empty())
+    return 0;
+
+  std::vector<SUnit *>::iterator Best = Queue.begin();
+  if (!DisableDFASched) {
+    signed BestCost = SUSchedulingCost(*Best);
+    for (std::vector<SUnit *>::iterator I = Queue.begin(),
+           E = Queue.end(); I != E; ++I) {
+      if (*I == *Best)
+        continue;
+
+      if (SUSchedulingCost(*I) > BestCost) {
+        BestCost = SUSchedulingCost(*I);
+        Best = I;
+      }
+    }
+  }
+  // Use default TD scheduling mechanism.
+  else {
+    for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
+       E = Queue.end(); I != E; ++I)
+      if (Picker(*Best, *I))
+        Best = I;
+  }
+
+  SUnit *V = *Best;
+  if (Best != prior(Queue.end()))
+    std::swap(*Best, Queue.back());
+
+  Queue.pop_back();
+
+  return V;
+}
+
+
+void ResourcePriorityQueue::remove(SUnit *SU) {
+  assert(!Queue.empty() && "Queue is empty!");
+  std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU);
+  if (I != prior(Queue.end()))
+    std::swap(*I, Queue.back());
+
+  Queue.pop_back();
+}
+
+
+#ifdef NDEBUG
+void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {}
+#else
+void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {
+  ResourcePriorityQueue q = *this;
+  while (!q.empty()) {
+    SUnit *su = q.pop();
+    dbgs() << "Height " << su->getHeight() << ": ";
+    su->dump(DAG);
+  }
+}
+#endif
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index b275c63..34ee1f3 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -447,7 +447,7 @@ static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
       Added = true;
     }
   }
-  for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
+  for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
     if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
       if (RegAdded.insert(*Alias)) {
         LRegs.push_back(*Alias);
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 80162d7..1017d36 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -1175,7 +1175,7 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
                                SmallSet<unsigned, 4> &RegAdded,
                                SmallVector<unsigned, 4> &LRegs,
                                const TargetRegisterInfo *TRI) {
-  for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
+  for (const uint16_t *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
 
     // Check if Ref is live.
     if (!LiveRegDefs[*AliasI]) continue;
@@ -1190,6 +1190,31 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
   }
 }
 
+/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
+/// by RegMask, and add them to LRegs.
+static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
+                                     std::vector<SUnit*> &LiveRegDefs,
+                                     SmallSet<unsigned, 4> &RegAdded,
+                                     SmallVector<unsigned, 4> &LRegs) {
+  // Look at all live registers. Skip Reg0 and the special CallResource.
+  for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
+    if (!LiveRegDefs[i]) continue;
+    if (LiveRegDefs[i] == SU) continue;
+    if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
+    if (RegAdded.insert(i))
+      LRegs.push_back(i);
+  }
+}
+
+/// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
+static const uint32_t *getNodeRegMask(const SDNode *N) {
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+    if (const RegisterMaskSDNode *Op =
+        dyn_cast<RegisterMaskSDNode>(N->getOperand(i).getNode()))
+      return Op->getRegMask();
+  return NULL;
+}
+
 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
 /// scheduling of the given node to satisfy live physical register dependencies.
 /// If the specific node is the last one that's available to schedule, do
@@ -1255,6 +1280,9 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
           LRegs.push_back(CallResource);
       }
     }
+    if (const uint32_t *RegMask = getNodeRegMask(Node))
+      CheckForLiveRegDefMasked(SU, RegMask, LiveRegDefs, RegAdded, LRegs);
+
     const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
     if (!MCID.ImplicitDefs)
       continue;
@@ -2635,7 +2663,8 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
                                          const TargetRegisterInfo *TRI) {
   const unsigned *ImpDefs
     = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
-  if(!ImpDefs)
+  const uint32_t *RegMask = getNodeRegMask(SU->getNode());
+  if(!ImpDefs && !RegMask)
     return false;
 
   for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end();
@@ -2646,14 +2675,18 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
       if (!PI->isAssignedRegDep())
         continue;
 
-      for (const unsigned *ImpDef = ImpDefs; *ImpDef; ++ImpDef) {
-        // Return true if SU clobbers this physical register use and the
-        // definition of the register reaches from DepSU. IsReachable queries a
-        // topological forward sort of the DAG (following the successors).
-        if (TRI->regsOverlap(*ImpDef, PI->getReg()) &&
-            scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
-          return true;
-      }
+      if (RegMask && MachineOperand::clobbersPhysReg(RegMask, PI->getReg()) &&
+          scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
+        return true;
+
+      if (ImpDefs)
+        for (const unsigned *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
+          // Return true if SU clobbers this physical register use and the
+          // definition of the register reaches from DepSU. IsReachable queries
+          // a topological forward sort of the DAG (following the successors).
+          if (TRI->regsOverlap(*ImpDef, PI->getReg()) &&
+              scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
+            return true;
     }
   }
   return false;
@@ -2674,8 +2707,9 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
       continue;
     const unsigned *SUImpDefs =
       TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
-    if (!SUImpDefs)
-      return false;
+    const uint32_t *SURegMask = getNodeRegMask(SUNode);
+    if (!SUImpDefs && !SURegMask)
+      continue;
     for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
       EVT VT = N->getValueType(i);
       if (VT == MVT::Glue || VT == MVT::Other)
@@ -2683,6 +2717,10 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
       if (!N->hasAnyUseOfValue(i))
         continue;
       unsigned Reg = ImpDefs[i - NumDefs];
+      if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
+        return true;
+      if (!SUImpDefs)
+        continue;
       for (;*SUImpDefs; ++SUImpDefs) {
         unsigned SUReg = *SUImpDefs;
         if (TRI->regsOverlap(Reg, SUReg))
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 9c27b2e..17b4901 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -53,6 +53,7 @@ namespace llvm {
       if (isa<ConstantSDNode>(Node))       return true;
       if (isa<ConstantFPSDNode>(Node))     return true;
       if (isa<RegisterSDNode>(Node))       return true;
+      if (isa<RegisterMaskSDNode>(Node))   return true;
       if (isa<GlobalAddressSDNode>(Node))  return true;
       if (isa<BasicBlockSDNode>(Node))     return true;
       if (isa<FrameIndexSDNode>(Node))     return true;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
new file mode 100644
index 0000000..7d12509
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -0,0 +1,276 @@
+//===- ScheduleDAGVLIW.cpp - SelectionDAG list scheduler for VLIW -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a top-down list scheduler, using standard algorithms.
+// The basic approach uses a priority queue of available nodes to schedule.
+// One at a time, nodes are taken from the priority queue (thus in priority
+// order), checked for legality to schedule, and emitted if legal.
+//
+// Nodes may not be legal to schedule either due to structural hazards (e.g.
+// pipeline or resource constraints) or because an input to the instruction has
+// not completed execution.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/ResourcePriorityQueue.h"
+#include <climits>
+using namespace llvm;
+
+STATISTIC(NumNoops , "Number of noops inserted");
+STATISTIC(NumStalls, "Number of pipeline stalls");
+
+static RegisterScheduler
+  VLIWScheduler("vliw-td", "VLIW scheduler",
+                createVLIWDAGScheduler);
+
+namespace {
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGVLIW - The actual DFA list scheduler implementation.  This
+/// supports / top-down scheduling.
+///
+class ScheduleDAGVLIW : public ScheduleDAGSDNodes {
+private:
+  /// AvailableQueue - The priority queue to use for the available SUnits.
+  ///
+  SchedulingPriorityQueue *AvailableQueue;
+
+  /// PendingQueue - This contains all of the instructions whose operands have
+  /// been issued, but their results are not ready yet (due to the latency of
+  /// the operation).  Once the operands become available, the instruction is
+  /// added to the AvailableQueue.
+  std::vector<SUnit*> PendingQueue;
+
+  /// HazardRec - The hazard recognizer to use.
+  ScheduleHazardRecognizer *HazardRec;
+
+  /// AA - AliasAnalysis for making memory reference queries.
+  AliasAnalysis *AA;
+
+public:
+  ScheduleDAGVLIW(MachineFunction &mf,
+                  AliasAnalysis *aa,
+                  SchedulingPriorityQueue *availqueue)
+    : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) {
+
+    const TargetMachine &tm = mf.getTarget();
+    HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
+  }
+
+  ~ScheduleDAGVLIW() {
+    delete HazardRec;
+    delete AvailableQueue;
+  }
+
+  void Schedule();
+
+private:
+  void releaseSucc(SUnit *SU, const SDep &D);
+  void releaseSuccessors(SUnit *SU);
+  void scheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+  void listScheduleTopDown();
+};
+}  // end anonymous namespace
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGVLIW::Schedule() {
+  DEBUG(dbgs()
+        << "********** List Scheduling BB#" << BB->getNumber()
+        << " '" << BB->getName() << "' **********\n");
+
+  // Build the scheduling graph.
+  BuildSchedGraph(AA);
+
+  AvailableQueue->initNodes(SUnits);
+
+  listScheduleTopDown();
+
+  AvailableQueue->releaseState();
+}
+
+//===----------------------------------------------------------------------===//
+//  Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// releaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the PendingQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) {
+  SUnit *SuccSU = D.getSUnit();
+
+#ifndef NDEBUG
+  if (SuccSU->NumPredsLeft == 0) {
+    dbgs() << "*** Scheduling failed! ***\n";
+    SuccSU->dump(this);
+    dbgs() << " has been released too many times!\n";
+    llvm_unreachable(0);
+  }
+#endif
+  --SuccSU->NumPredsLeft;
+
+  SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency());
+
+  // If all the node's predecessors are scheduled, this node is ready
+  // to be scheduled. Ignore the special ExitSU node.
+  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
+    PendingQueue.push_back(SuccSU);
+  }
+}
+
+void ScheduleDAGVLIW::releaseSuccessors(SUnit *SU) {
+  // Top down: release successors.
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    assert(!I->isAssignedRegDep() &&
+           "The list-td scheduler doesn't yet support physreg dependencies!");
+
+    releaseSucc(SU, *I);
+  }
+}
+
+/// scheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGVLIW::scheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+  DEBUG(SU->dump(this));
+
+  Sequence.push_back(SU);
+  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
+  SU->setDepthToAtLeast(CurCycle);
+
+  releaseSuccessors(SU);
+  SU->isScheduled = true;
+  AvailableQueue->ScheduledNode(SU);
+}
+
+/// listScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void ScheduleDAGVLIW::listScheduleTopDown() {
+  unsigned CurCycle = 0;
+
+  // Release any successors of the special Entry node.
+  releaseSuccessors(&EntrySU);
+
+  // All leaves to AvailableQueue.
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    // It is available if it has no predecessors.
+    if (SUnits[i].Preds.empty()) {
+      AvailableQueue->push(&SUnits[i]);
+      SUnits[i].isAvailable = true;
+    }
+  }
+
+  // While AvailableQueue is not empty, grab the node with the highest
+  // priority. If it is not ready put it back.  Schedule the node.
+  std::vector<SUnit*> NotReady;
+  Sequence.reserve(SUnits.size());
+  while (!AvailableQueue->empty() || !PendingQueue.empty()) {
+    // Check to see if any of the pending instructions are ready to issue.  If
+    // so, add them to the available queue.
+    for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+      if (PendingQueue[i]->getDepth() == CurCycle) {
+        AvailableQueue->push(PendingQueue[i]);
+        PendingQueue[i]->isAvailable = true;
+        PendingQueue[i] = PendingQueue.back();
+        PendingQueue.pop_back();
+        --i; --e;
+      }
+      else {
+        assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?");
+      }
+    }
+
+    // If there are no instructions available, don't try to issue anything, and
+    // don't advance the hazard recognizer.
+    if (AvailableQueue->empty()) {
+      // Reset DFA state.
+      AvailableQueue->ScheduledNode(0);
+      ++CurCycle;
+      continue;
+    }
+
+    SUnit *FoundSUnit = 0;
+
+    bool HasNoopHazards = false;
+    while (!AvailableQueue->empty()) {
+      SUnit *CurSUnit = AvailableQueue->pop();
+
+      ScheduleHazardRecognizer::HazardType HT =
+        HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
+      if (HT == ScheduleHazardRecognizer::NoHazard) {
+        FoundSUnit = CurSUnit;
+        break;
+      }
+
+      // Remember if this is a noop hazard.
+      HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
+
+      NotReady.push_back(CurSUnit);
+    }
+
+    // Add the nodes that aren't ready back onto the available list.
+    if (!NotReady.empty()) {
+      AvailableQueue->push_all(NotReady);
+      NotReady.clear();
+    }
+
+    // If we found a node to schedule, do it now.
+    if (FoundSUnit) {
+      scheduleNodeTopDown(FoundSUnit, CurCycle);
+      HazardRec->EmitInstruction(FoundSUnit);
+
+      // If this is a pseudo-op node, we don't want to increment the current
+      // cycle.
+      if (FoundSUnit->Latency)  // Don't increment CurCycle for pseudo-ops!
+        ++CurCycle;
+    } else if (!HasNoopHazards) {
+      // Otherwise, we have a pipeline stall, but no other problem, just advance
+      // the current cycle and try again.
+      DEBUG(dbgs() << "*** Advancing cycle, no work to do\n");
+      HazardRec->AdvanceCycle();
+      ++NumStalls;
+      ++CurCycle;
+    } else {
+      // Otherwise, we have no instructions to issue and we have instructions
+      // that will fault if we don't do this right.  This is the case for
+      // processors without pipeline interlocks and other cases.
+      DEBUG(dbgs() << "*** Emitting noop\n");
+      HazardRec->EmitNoop();
+      Sequence.push_back(0);   // NULL here means noop
+      ++NumNoops;
+      ++CurCycle;
+    }
+  }
+
+#ifndef NDEBUG
+  VerifySchedule(/*isBottomUp=*/false);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+/// createVLIWDAGScheduler - This creates a top-down list scheduler.
+ScheduleDAGSDNodes *
+llvm::createVLIWDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+  return new ScheduleDAGVLIW(*IS->MF, IS->AA, new ResourcePriorityQueue(IS));
+}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index dd626e2..796abf4 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -383,7 +383,9 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
   case ISD::Register:
     ID.AddInteger(cast<RegisterSDNode>(N)->getReg());
     break;
-
+  case ISD::RegisterMask:
+    ID.AddPointer(cast<RegisterMaskSDNode>(N)->getRegMask());
+    break;
   case ISD::SRCVALUE:
     ID.AddPointer(cast<SrcValueSDNode>(N)->getValue());
     break;
@@ -1037,10 +1039,8 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) {
     apf.convert(*EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
                 &ignored);
     return getConstantFP(apf, VT, isTarget);
-  } else {
-    assert(0 && "Unsupported type in getConstantFP");
-    return SDValue();
-  }
+  } else
+    llvm_unreachable("Unsupported type in getConstantFP");
 }
 
 SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL,
@@ -1375,6 +1375,20 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
   return SDValue(N, 0);
 }
 
+SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) {
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), 0, 0);
+  ID.AddPointer(RegMask);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = new (NodeAllocator) RegisterMaskSDNode(RegMask);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
 SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) {
   FoldingSetNodeID ID;
   SDValue Ops[] = { Root };
@@ -2229,8 +2243,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
 
     Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
     if (Tmp2 == 1) return 1;
-      return std::min(Tmp, Tmp2)-1;
-    break;
+    return std::min(Tmp, Tmp2)-1;
 
   case ISD::SUB:
     Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
@@ -2259,8 +2272,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
     // is, at worst, one more bit than the inputs.
     Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
     if (Tmp == 1) return 1;  // Early out.
-      return std::min(Tmp, Tmp2)-1;
-    break;
+    return std::min(Tmp, Tmp2)-1;
   case ISD::TRUNCATE:
     // FIXME: it's tricky to do anything useful for this, but it is an important
     // case for targets like X86.
@@ -2571,17 +2583,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
            "Vector element count mismatch!");
     if (OpOpcode == ISD::TRUNCATE)
       return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
-    else if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
-             OpOpcode == ISD::ANY_EXTEND) {
+    if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
+        OpOpcode == ISD::ANY_EXTEND) {
       // If the source is smaller than the dest, we still need an extend.
       if (Operand.getNode()->getOperand(0).getValueType().getScalarType()
             .bitsLT(VT.getScalarType()))
         return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
-      else if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT))
+      if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT))
         return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
-      else
-        return Operand.getNode()->getOperand(0);
+      return Operand.getNode()->getOperand(0);
     }
+    if (OpOpcode == ISD::UNDEF)
+      return getUNDEF(VT);
     break;
   case ISD::BITCAST:
     // Basic sanity checking.
@@ -3143,16 +3156,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
   case ISD::SELECT:
     if (N1C) {
      if (N1C->getZExtValue())
-        return N2;             // select true, X, Y -> X
-      else
-        return N3;             // select false, X, Y -> Y
+       return N2;             // select true, X, Y -> X
+     return N3;             // select false, X, Y -> Y
     }
 
     if (N2 == N3) return N2;   // select C, X, X -> X
     break;
   case ISD::VECTOR_SHUFFLE:
     llvm_unreachable("should use getVectorShuffle constructor!");
-    break;
   case ISD::INSERT_SUBVECTOR: {
     SDValue Index = N3;
     if (VT.isSimple() && N1.getValueType().isSimple()
@@ -3285,8 +3296,7 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
 /// used when a memcpy is turned into a memset when the source is a constant
 /// string ptr.
 static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
-                                  const TargetLowering &TLI,
-                                  std::string &Str, unsigned Offset) {
+                                  const TargetLowering &TLI, StringRef Str) {
   // Handle vector with all elements zero.
   if (Str.empty()) {
     if (VT.isInteger())
@@ -3304,15 +3314,18 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
   }
 
   assert(!VT.isVector() && "Can't handle vector type here!");
-  unsigned NumBits = VT.getSizeInBits();
-  unsigned MSB = NumBits / 8;
+  unsigned NumVTBytes = VT.getSizeInBits() / 8;
+  unsigned NumBytes = std::min(NumVTBytes, unsigned(Str.size()));
+
   uint64_t Val = 0;
-  if (TLI.isLittleEndian())
-    Offset = Offset + MSB - 1;
-  for (unsigned i = 0; i != MSB; ++i) {
-    Val = (Val << 8) | (unsigned char)Str[Offset];
-    Offset += TLI.isLittleEndian() ? -1 : 1;
+  if (TLI.isLittleEndian()) {
+    for (unsigned i = 0; i != NumBytes; ++i)
+      Val |= (uint64_t)(unsigned char)Str[i] << i*8;
+  } else {
+    for (unsigned i = 0; i != NumBytes; ++i)
+      Val |= (uint64_t)(unsigned char)Str[i] << (NumVTBytes-i-1)*8;
   }
+
   return DAG.getConstant(Val, VT);
 }
 
@@ -3327,7 +3340,7 @@ static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset,
 
 /// isMemSrcFromString - Returns true if memcpy source is a string constant.
 ///
-static bool isMemSrcFromString(SDValue Src, std::string &Str) {
+static bool isMemSrcFromString(SDValue Src, StringRef &Str) {
   unsigned SrcDelta = 0;
   GlobalAddressSDNode *G = NULL;
   if (Src.getOpcode() == ISD::GlobalAddress)
@@ -3341,11 +3354,7 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) {
   if (!G)
     return false;
 
-  const GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal());
-  if (GV && GetConstantStringInfo(GV, Str, SrcDelta, false))
-    return true;
-
-  return false;
+  return getConstantStringInfo(G->getGlobal(), Str, SrcDelta, false);
 }
 
 /// FindOptimalMemOpLowering - Determines the optimial series memory ops
@@ -3448,7 +3457,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
   unsigned SrcAlign = DAG.InferPtrAlignment(Src);
   if (Align > SrcAlign)
     SrcAlign = Align;
-  std::string Str;
+  StringRef Str;
   bool CopyFromStr = isMemSrcFromString(Src, Str);
   bool isZeroStr = CopyFromStr && Str.empty();
   unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
@@ -3485,7 +3494,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
       // We only handle zero vectors here.
       // FIXME: Handle other cases where store of vector immediate is done in
       // a single instruction.
-      Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff);
+      Value = getMemsetStringVal(VT, dl, DAG, TLI, Str.substr(SrcOff));
       Store = DAG.getStore(Chain, dl, Value,
                            getMemBasePlusOffset(Dst, DstOff, DAG),
                            DstPtrInfo.getWithOffset(DstOff), isVol,
@@ -3727,8 +3736,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
   std::pair<SDValue,SDValue> CallResult =
     TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
                     false, false, false, false, 0,
-                    TLI.getLibcallCallingConv(RTLIB::MEMCPY), false,
-                    /*isReturnValueUsed=*/false,
+                    TLI.getLibcallCallingConv(RTLIB::MEMCPY),
+                    /*isTailCall=*/false,
+                    /*doesNotReturn=*/false, /*isReturnValueUsed=*/false,
                     getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY),
                                       TLI.getPointerTy()),
                     Args, *this, dl);
@@ -3779,8 +3789,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
   std::pair<SDValue,SDValue> CallResult =
     TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
                     false, false, false, false, 0,
-                    TLI.getLibcallCallingConv(RTLIB::MEMMOVE), false,
-                    /*isReturnValueUsed=*/false,
+                    TLI.getLibcallCallingConv(RTLIB::MEMMOVE),
+                    /*isTailCall=*/false,
+                    /*doesNotReturn=*/false, /*isReturnValueUsed=*/false,
                     getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE),
                                       TLI.getPointerTy()),
                     Args, *this, dl);
@@ -3839,8 +3850,9 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
   std::pair<SDValue,SDValue> CallResult =
     TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
                     false, false, false, false, 0,
-                    TLI.getLibcallCallingConv(RTLIB::MEMSET), false,
-                    /*isReturnValueUsed=*/false,
+                    TLI.getLibcallCallingConv(RTLIB::MEMSET),
+                    /*isTailCall=*/false,
+                    /*doesNotReturn*/false, /*isReturnValueUsed=*/false,
                     getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
                                       TLI.getPointerTy()),
                     Args, *this, dl);
@@ -5901,7 +5913,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
       if (G)
         if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo())
           if (getMachineOpcode() < TII->getNumOpcodes())
-            return TII->get(getMachineOpcode()).getName();
+            return TII->getName(getMachineOpcode());
       return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>";
     }
     if (G) {
@@ -5945,7 +5957,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::BasicBlock:    return "BasicBlock";
   case ISD::VALUETYPE:     return "ValueType";
   case ISD::Register:      return "Register";
-
+  case ISD::RegisterMask:  return "RegisterMask";
   case ISD::Constant:      return "Constant";
   case ISD::ConstantFP:    return "ConstantFP";
   case ISD::GlobalAddress: return "GlobalAddress";
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 68c9514..4e4aa11 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -197,7 +197,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
     // FP_ROUND's are always exact here.
     if (ValueVT.bitsLT(Val.getValueType()))
       return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val,
-                         DAG.getIntPtrConstant(1));
+                         DAG.getTargetConstant(1, TLI.getPointerTy()));
 
     return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
   }
@@ -206,7 +206,6 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
     return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
 
   llvm_unreachable("Unknown mismatch!");
-  return SDValue();
 }
 
 /// getCopyFromParts - Create a value that contains the specified legal parts
@@ -353,10 +352,13 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
       assert(NumParts == 1 && "Do not know what to promote to!");
       Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
     } else {
-      assert(PartVT.isInteger() && ValueVT.isInteger() &&
+      assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
+             ValueVT.isInteger() &&
              "Unknown mismatch!");
       ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
       Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
+      if (PartVT == MVT::x86mmx)
+        Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
     }
   } else if (PartBits == ValueVT.getSizeInBits()) {
     // Different types of the same size.
@@ -364,10 +366,13 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
     Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
   } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
     // If the parts cover less bits than value has, truncate the value.
-    assert(PartVT.isInteger() && ValueVT.isInteger() &&
+    assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
+           ValueVT.isInteger() &&
            "Unknown mismatch!");
     ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
     Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+    if (PartVT == MVT::x86mmx)
+      Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
   }
 
   // The value may have changed - recompute ValueVT.
@@ -966,7 +971,7 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
         DAG.AddDbgValue(SDV, Val.getNode(), false);
       }
     } else
-      DEBUG(dbgs() << "Dropping debug info for " << DI);
+      DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
     DanglingDebugInfoMap[V] = DanglingDebugInfo();
   }
 }
@@ -1056,6 +1061,23 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
       return DAG.getMergeValues(&Constants[0], Constants.size(),
                                 getCurDebugLoc());
     }
+    
+    if (const ConstantDataSequential *CDS =
+          dyn_cast<ConstantDataSequential>(C)) {
+      SmallVector<SDValue, 4> Ops;
+      for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+        SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode();
+        // Add each leaf value from the operand to the Constants list
+        // to form a flattened list of all the values.
+        for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
+          Ops.push_back(SDValue(Val, i));
+      }
+
+      if (isa<ArrayType>(CDS->getType()))
+        return DAG.getMergeValues(&Ops[0], Ops.size(), getCurDebugLoc());
+      return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
+                                      VT, &Ops[0], Ops.size());
+    }
 
     if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
       assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
@@ -1090,9 +1112,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
     // Now that we know the number and type of the elements, get that number of
     // elements into the Ops array based on what kind of constant it is.
     SmallVector<SDValue, 16> Ops;
-    if (const ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
+    if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
       for (unsigned i = 0; i != NumElements; ++i)
-        Ops.push_back(getValue(CP->getOperand(i)));
+        Ops.push_back(getValue(CV->getOperand(i)));
     } else {
       assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
       EVT EltVT = TLI.getValueType(VecTy->getElementType());
@@ -1128,7 +1150,6 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
   }
 
   llvm_unreachable("Can't get register for value!");
-  return SDValue();
 }
 
 void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
@@ -1287,8 +1308,8 @@ bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
 }
 
 /// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
-uint32_t SelectionDAGBuilder::getEdgeWeight(MachineBasicBlock *Src,
-                                            MachineBasicBlock *Dst) {
+uint32_t SelectionDAGBuilder::getEdgeWeight(const MachineBasicBlock *Src,
+                                            const MachineBasicBlock *Dst) const {
   BranchProbabilityInfo *BPI = FuncInfo.BPI;
   if (!BPI)
     return 0;
@@ -1824,9 +1845,6 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
                           DAG.getBasicBlock(Return)));
 }
 
-void SelectionDAGBuilder::visitUnwind(const UnwindInst &I) {
-}
-
 void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
   llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
 }
@@ -1839,6 +1857,12 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
   MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
   AddLandingPadInfo(LP, MMI, MBB);
 
+  // If there aren't registers to copy the values into (e.g., during SjLj
+  // exceptions), then don't bother to create these DAG nodes.
+  if (TLI.getExceptionPointerRegister() == 0 &&
+      TLI.getExceptionSelectorRegister() == 0)
+    return;
+
   SmallVector<EVT, 2> ValueVTs;
   ComputeValueVTs(TLI, LP.getType(), ValueVTs);
 
@@ -2194,7 +2218,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
 
   CaseRange LHSR(CR.Range.first, Pivot);
   CaseRange RHSR(Pivot, CR.Range.second);
-  Constant *C = Pivot->Low;
+  const Constant *C = Pivot->Low;
   MachineBasicBlock *FalseBB = 0, *TrueBB = 0;
 
   // We know that we branch to the LHS if the Value being switched on is
@@ -2387,14 +2411,14 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
 
   BranchProbabilityInfo *BPI = FuncInfo.BPI;
   // Start with "simple" cases
-  for (size_t i = 1; i < SI.getNumSuccessors(); ++i) {
-    BasicBlock *SuccBB = SI.getSuccessor(i);
+  for (size_t i = 0; i < SI.getNumCases(); ++i) {
+    BasicBlock *SuccBB = SI.getCaseSuccessor(i);
     MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB];
 
     uint32_t ExtraWeight = BPI ? BPI->getEdgeWeight(SI.getParent(), SuccBB) : 0;
 
-    Cases.push_back(Case(SI.getSuccessorValue(i),
-                         SI.getSuccessorValue(i),
+    Cases.push_back(Case(SI.getCaseValue(i),
+                         SI.getCaseValue(i),
                          SMBB, ExtraWeight));
   }
   std::sort(Cases.begin(), Cases.end(), CaseCmp());
@@ -2461,7 +2485,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
 
   // If there is only the default destination, branch to it if it is not the
   // next basic block.  Otherwise, just fall through.
-  if (SI.getNumCases() == 1) {
+  if (!SI.getNumCases()) {
     // Update machine-CFG edges.
 
     // If this is not a fall-through branch, emit the branch.
@@ -2691,7 +2715,8 @@ void SelectionDAGBuilder::visitFPTrunc(const User &I) {
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(),
-                           DestVT, N, DAG.getIntPtrConstant(0)));
+                           DestVT, N,
+                           DAG.getTargetConstant(0, TLI.getPointerTy())));
 }
 
 void SelectionDAGBuilder::visitFPExt(const User &I){
@@ -2778,33 +2803,25 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) {
                            TLI.getValueType(I.getType()), InVec, InIdx));
 }
 
-// Utility for visitShuffleVector - Returns true if the mask is mask starting
-// from SIndx and increasing to the element length (undefs are allowed).
-static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) {
-  unsigned MaskNumElts = Mask.size();
-  for (unsigned i = 0; i != MaskNumElts; ++i)
-    if ((Mask[i] >= 0) && (Mask[i] != (int)(i + SIndx)))
+// Utility for visitShuffleVector - Return true if every element in Mask,
+// begining // from position Pos and ending in Pos+Size, falls within the
+// specified sequential range [L, L+Pos). or is undef.
+static bool isSequentialInRange(const SmallVectorImpl<int> &Mask,
+                                int Pos, int Size, int Low) {
+  for (int i = Pos, e = Pos+Size; i != e; ++i, ++Low)
+    if (Mask[i] >= 0 && Mask[i] != Low)
       return false;
   return true;
 }
 
 void SelectionDAGBuilder::visitShuffleVector(const User &I) {
-  SmallVector<int, 8> Mask;
   SDValue Src1 = getValue(I.getOperand(0));
   SDValue Src2 = getValue(I.getOperand(1));
 
-  // Convert the ConstantVector mask operand into an array of ints, with -1
-  // representing undef values.
-  SmallVector<Constant*, 8> MaskElts;
-  cast<Constant>(I.getOperand(2))->getVectorElements(MaskElts);
-  unsigned MaskNumElts = MaskElts.size();
-  for (unsigned i = 0; i != MaskNumElts; ++i) {
-    if (isa<UndefValue>(MaskElts[i]))
-      Mask.push_back(-1);
-    else
-      Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue());
-  }
-
+  SmallVector<int, 8> Mask;
+  ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask);
+  unsigned MaskNumElts = Mask.size();
+  
   EVT VT = TLI.getValueType(I.getType());
   EVT SrcVT = Src1.getValueType();
   unsigned SrcNumElts = SrcVT.getVectorNumElements();
@@ -2820,11 +2837,23 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
     // Mask is longer than the source vectors and is a multiple of the source
     // vectors.  We can use concatenate vector to make the mask and vectors
     // lengths match.
-    if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) {
-      // The shuffle is concatenating two vectors together.
-      setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
-                               VT, Src1, Src2));
-      return;
+    if (SrcNumElts*2 == MaskNumElts) {
+      // First check for Src1 in low and Src2 in high
+      if (isSequentialInRange(Mask, 0, SrcNumElts, 0) &&
+          isSequentialInRange(Mask, SrcNumElts, SrcNumElts, SrcNumElts)) {
+        // The shuffle is concatenating two vectors together.
+        setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
+                                 VT, Src1, Src2));
+        return;
+      }
+      // Then check for Src2 in low and Src1 in high
+      if (isSequentialInRange(Mask, 0, SrcNumElts, SrcNumElts) &&
+          isSequentialInRange(Mask, SrcNumElts, SrcNumElts, 0)) {
+        // The shuffle is concatenating two vectors together.
+        setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
+                                 VT, Src2, Src1));
+        return;
+      }
     }
 
     // Pad both vectors with undefs to make them the same length as the mask.
@@ -3048,7 +3077,9 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
 
 void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
   SDValue N = getValue(I.getOperand(0));
-  Type *Ty = I.getOperand(0)->getType();
+  // Note that the pointer operand may be a vector of pointers. Take the scalar
+  // element which holds a pointer.
+  Type *Ty = I.getOperand(0)->getType()->getScalarType();
 
   for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
        OI != E; ++OI) {
@@ -3365,7 +3396,7 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
   DebugLoc dl = getCurDebugLoc();
   ISD::NodeType NT;
   switch (I.getOperation()) {
-  default: llvm_unreachable("Unknown atomicrmw operation"); return;
+  default: llvm_unreachable("Unknown atomicrmw operation");
   case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break;
   case AtomicRMWInst::Add:  NT = ISD::ATOMIC_LOAD_ADD; break;
   case AtomicRMWInst::Sub:  NT = ISD::ATOMIC_LOAD_SUB; break;
@@ -3503,24 +3534,16 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
   // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
   if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
       Info.opc == ISD::INTRINSIC_W_CHAIN)
-    Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy()));
+    Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy()));
 
   // Add all operands of the call to the operand list.
   for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
     SDValue Op = getValue(I.getArgOperand(i));
-    assert(TLI.isTypeLegal(Op.getValueType()) &&
-           "Intrinsic uses a non-legal type?");
     Ops.push_back(Op);
   }
 
   SmallVector<EVT, 4> ValueVTs;
   ComputeValueVTs(TLI, I.getType(), ValueVTs);
-#ifndef NDEBUG
-  for (unsigned Val = 0, E = ValueVTs.size(); Val != E; ++Val) {
-    assert(TLI.isTypeLegal(ValueVTs[Val]) &&
-           "Intrinsic uses a non-legal type?");
-  }
-#endif // NDEBUG
 
   if (HasChain)
     ValueVTs.push_back(MVT::Other);
@@ -4480,9 +4503,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                              getValue(I.getArgOperand(0))));
     return 0;
   case Intrinsic::setjmp:
-    return "_setjmp"+!TLI.usesUnderscoreSetJmp();
+    return &"_setjmp"[!TLI.usesUnderscoreSetJmp()];
   case Intrinsic::longjmp:
-    return "_longjmp"+!TLI.usesUnderscoreLongJmp();
+    return &"_longjmp"[!TLI.usesUnderscoreLongJmp()];
   case Intrinsic::memcpy: {
     // Assert for address < 256 since we support only user defined address
     // spaces.
@@ -4550,7 +4573,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     // Check if address has undef value.
     if (isa<UndefValue>(Address) ||
         (Address->use_empty() && !isa<Argument>(Address))) {
-      DEBUG(dbgs() << "Dropping debug info for " << DI);
+      DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
       return 0;
     }
 
@@ -4560,11 +4583,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
       N = UnusedArgNodeMap[Address];
     SDDbgValue *SDV;
     if (N.getNode()) {
-      // Parameters are handled specially.
-      bool isParameter =
-        DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable;
       if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
         Address = BCI->getOperand(0);
+      // Parameters are handled specially.
+      bool isParameter =
+        (DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable ||
+         isa<Argument>(Address));
+
       const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
 
       if (isParameter && !AI) {
@@ -4584,7 +4609,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                               0, dl, SDNodeOrder);
       else {
         // Can't do anything with other non-AI cases yet.
-        DEBUG(dbgs() << "Dropping debug info for " << DI);
+        DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+        DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t");
+        DEBUG(Address->dump());
         return 0;
       }
       DAG.AddDbgValue(SDV, N.getNode(), isParameter);
@@ -4606,7 +4633,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
             }
           }
         }
-        DEBUG(dbgs() << "Dropping debug info for " << DI);
+        DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
       }
     }
     return 0;
@@ -4652,7 +4679,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
       } else {
         // We may expand this to cover more cases.  One case where we have no
         // data available is an unreferenced parameter.
-        DEBUG(dbgs() << "Dropping debug info for " << DI);
+        DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
       }
     }
 
@@ -4674,43 +4701,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
       MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc());
     return 0;
   }
-  case Intrinsic::eh_exception: {
-    // Insert the EXCEPTIONADDR instruction.
-    assert(FuncInfo.MBB->isLandingPad() &&
-           "Call to eh.exception not in landing pad!");
-    SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
-    SDValue Ops[1];
-    Ops[0] = DAG.getRoot();
-    SDValue Op = DAG.getNode(ISD::EXCEPTIONADDR, dl, VTs, Ops, 1);
-    setValue(&I, Op);
-    DAG.setRoot(Op.getValue(1));
-    return 0;
-  }
-
-  case Intrinsic::eh_selector: {
-    MachineBasicBlock *CallMBB = FuncInfo.MBB;
-    MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
-    if (CallMBB->isLandingPad())
-      AddCatchInfo(I, &MMI, CallMBB);
-    else {
-#ifndef NDEBUG
-      FuncInfo.CatchInfoLost.insert(&I);
-#endif
-      // FIXME: Mark exception selector register as live in.  Hack for PR1508.
-      unsigned Reg = TLI.getExceptionSelectorRegister();
-      if (Reg) FuncInfo.MBB->addLiveIn(Reg);
-    }
-
-    // Insert the EHSELECTION instruction.
-    SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
-    SDValue Ops[2];
-    Ops[0] = getValue(I.getArgOperand(0));
-    Ops[1] = getRoot();
-    SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2);
-    DAG.setRoot(Op.getValue(1));
-    setValue(&I, DAG.getSExtOrTrunc(Op, dl, MVT::i32));
-    return 0;
-  }
 
   case Intrinsic::eh_typeid_for: {
     // Find the type id for the given typeinfo.
@@ -4843,6 +4833,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     setValue(&I, Res);
     return 0;
   }
+  case Intrinsic::x86_avx_vinsertf128_pd_256:
+  case Intrinsic::x86_avx_vinsertf128_ps_256:
+  case Intrinsic::x86_avx_vinsertf128_si_256: {
+    DebugLoc dl = getCurDebugLoc();
+    EVT DestVT = TLI.getValueType(I.getType());
+    EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType());
+    uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) *
+                   ElVT.getVectorNumElements();
+    Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, DestVT,
+                      getValue(I.getArgOperand(0)),
+                      getValue(I.getArgOperand(1)),
+                      DAG.getConstant(Idx, MVT::i32));
+    setValue(&I, Res);
+    return 0;
+  }
   case Intrinsic::convertff:
   case Intrinsic::convertfsi:
   case Intrinsic::convertfui:
@@ -5058,7 +5063,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::gcread:
   case Intrinsic::gcwrite:
     llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
-    return 0;
   case Intrinsic::flt_rounds:
     setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32));
     return 0;
@@ -5079,7 +5083,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     std::pair<SDValue, SDValue> Result =
       TLI.LowerCallTo(getRoot(), I.getType(),
                  false, false, false, false, 0, CallingConv::C,
-                 /*isTailCall=*/false, /*isReturnValueUsed=*/true,
+                 /*isTailCall=*/false,
+                 /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
                  DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()),
                  Args, DAG, getCurDebugLoc());
     DAG.setRoot(Result.second);
@@ -5242,6 +5247,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
                     CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(),
                     CS.getCallingConv(),
                     isTailCall,
+                    CS.doesNotReturn(),
                     !CS.getInstruction()->use_empty(),
                     Callee, Args, DAG, getCurDebugLoc());
   assert((isTailCall || Result.second.getNode()) &&
@@ -5477,23 +5483,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
     return;
   }
 
-  // See if any floating point values are being passed to this function. This is
-  // used to emit an undefined reference to fltused on Windows.
-  FunctionType *FT =
-    cast<FunctionType>(I.getCalledValue()->getType()->getContainedType(0));
   MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
-  if (FT->isVarArg() &&
-      !MMI.callsExternalVAFunctionWithFloatingPointArguments()) {
-    for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
-      Type* T = I.getArgOperand(i)->getType();
-      for (po_iterator<Type*> i = po_begin(T), e = po_end(T);
-           i != e; ++i) {
-        if (!i->isFloatingPointTy()) continue;
-        MMI.setCallsExternalVAFunctionWithFloatingPointArguments(true);
-        break;
-      }
-    }
-  }
+  ComputeUsesVAFloatArgument(I, &MMI);
 
   const char *RenameFn = 0;
   if (Function *F = I.getCalledFunction()) {
@@ -5636,7 +5627,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
                  (LibInfo->has(LibFunc::log2l) && Name == "log2l")) {
         if (I.getNumArgOperands() == 1 && // Basic sanity checks.
             I.getArgOperand(0)->getType()->isFloatingPointTy() &&
-            I.getType() == I.getArgOperand(0)->getType()) {
+            I.getType() == I.getArgOperand(0)->getType() &&
+            I.onlyReadsMemory()) {
           SDValue Tmp = getValue(I.getArgOperand(0));
           setValue(&I, DAG.getNode(ISD::FLOG2, getCurDebugLoc(),
                                    Tmp.getValueType(), Tmp));
@@ -5647,7 +5639,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
                  (LibInfo->has(LibFunc::exp2l) && Name == "exp2l")) {
         if (I.getNumArgOperands() == 1 && // Basic sanity checks.
             I.getArgOperand(0)->getType()->isFloatingPointTy() &&
-            I.getType() == I.getArgOperand(0)->getType()) {
+            I.getType() == I.getArgOperand(0)->getType() &&
+            I.onlyReadsMemory()) {
           SDValue Tmp = getValue(I.getArgOperand(0));
           setValue(&I, DAG.getNode(ISD::FEXP2, getCurDebugLoc(),
                                    Tmp.getValueType(), Tmp));
@@ -5690,22 +5683,6 @@ public:
     : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) {
   }
 
-  /// MarkAllocatedRegs - Once AssignedRegs is set, mark the assigned registers
-  /// busy in OutputRegs/InputRegs.
-  void MarkAllocatedRegs(bool isOutReg, bool isInReg,
-                         std::set<unsigned> &OutputRegs,
-                         std::set<unsigned> &InputRegs,
-                         const TargetRegisterInfo &TRI) const {
-    if (isOutReg) {
-      for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
-        MarkRegAndAliases(AssignedRegs.Regs[i], OutputRegs, TRI);
-    }
-    if (isInReg) {
-      for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
-        MarkRegAndAliases(AssignedRegs.Regs[i], InputRegs, TRI);
-    }
-  }
-
   /// getCallOperandValEVT - Return the EVT of the Value* that this operand
   /// corresponds to.  If there is no Value* for this operand, it returns
   /// MVT::Other.
@@ -5753,18 +5730,6 @@ public:
 
     return TLI.getValueType(OpTy, true);
   }
-
-private:
-  /// MarkRegAndAliases - Mark the specified register and all aliases in the
-  /// specified set.
-  static void MarkRegAndAliases(unsigned Reg, std::set<unsigned> &Regs,
-                                const TargetRegisterInfo &TRI) {
-    assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "Isn't a physreg");
-    Regs.insert(Reg);
-    if (const unsigned *Aliases = TRI.getAliasSet(Reg))
-      for (; *Aliases; ++Aliases)
-        Regs.insert(*Aliases);
-  }
 };
 
 typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
@@ -5778,39 +5743,13 @@ typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
 /// allocation.  This produces generally horrible, but correct, code.
 ///
 ///   OpInfo describes the operand.
-///   Input and OutputRegs are the set of already allocated physical registers.
 ///
 static void GetRegistersForValue(SelectionDAG &DAG,
                                  const TargetLowering &TLI,
                                  DebugLoc DL,
-                                 SDISelAsmOperandInfo &OpInfo,
-                                 std::set<unsigned> &OutputRegs,
-                                 std::set<unsigned> &InputRegs) {
+                                 SDISelAsmOperandInfo &OpInfo) {
   LLVMContext &Context = *DAG.getContext();
 
-  // Compute whether this value requires an input register, an output register,
-  // or both.
-  bool isOutReg = false;
-  bool isInReg = false;
-  switch (OpInfo.Type) {
-  case InlineAsm::isOutput:
-    isOutReg = true;
-
-    // If there is an input constraint that matches this, we need to reserve
-    // the input register so no other inputs allocate to it.
-    isInReg = OpInfo.hasMatchingInput();
-    break;
-  case InlineAsm::isInput:
-    isInReg = true;
-    isOutReg = false;
-    break;
-  case InlineAsm::isClobber:
-    isOutReg = true;
-    isInReg = true;
-    break;
-  }
-
-
   MachineFunction &MF = DAG.getMachineFunction();
   SmallVector<unsigned, 4> Regs;
 
@@ -5884,8 +5823,6 @@ static void GetRegistersForValue(SelectionDAG &DAG,
     }
 
     OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
-    const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
-    OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
     return;
   }
 
@@ -5916,8 +5853,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
   /// ConstraintOperands - Information about all of the constraints.
   SDISelAsmOperandInfoVector ConstraintOperands;
 
-  std::set<unsigned> OutputRegs, InputRegs;
-
   TargetLowering::AsmOperandInfoVector
     TargetConstraints = TLI.ParseConstraints(CS);
 
@@ -6050,7 +5985,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
       // constant pool entry to get its address.
       const Value *OpVal = OpInfo.CallOperandVal;
       if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
-          isa<ConstantVector>(OpVal)) {
+          isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) {
         OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
                                                  TLI.getPointerTy());
       } else {
@@ -6079,8 +6014,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     // If this constraint is for a specific register, allocate it before
     // anything else.
     if (OpInfo.ConstraintType == TargetLowering::C_Register)
-      GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs,
-                           InputRegs);
+      GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo);
   }
 
   // Second pass - Loop over all of the operands, assigning virtual or physregs
@@ -6091,8 +6025,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     // C_Register operands have already been allocated, Other/Memory don't need
     // to be.
     if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
-      GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs,
-                           InputRegs);
+      GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo);
   }
 
   // AsmNodeOperands - The operands for the ISD::INLINEASM node.
@@ -6146,9 +6079,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
 
       // Copy the output from the appropriate register.  Find a register that
       // we can use.
-      if (OpInfo.AssignedRegs.Regs.empty())
-        report_fatal_error("Couldn't allocate output reg for constraint '" +
-                           Twine(OpInfo.ConstraintCode) + "'!");
+      if (OpInfo.AssignedRegs.Regs.empty()) {
+        LLVMContext &Ctx = *DAG.getContext();
+        Ctx.emitError(CS.getInstruction(),  
+                      "couldn't allocate output register for constraint '" +
+                           Twine(OpInfo.ConstraintCode) + "'");
+        break;
+      }
 
       // If this is an indirect operand, store through the pointer after the
       // asm.
@@ -6248,9 +6185,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
         std::vector<SDValue> Ops;
         TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
                                          Ops, DAG);
-        if (Ops.empty())
-          report_fatal_error("Invalid operand for inline asm constraint '" +
-                             Twine(OpInfo.ConstraintCode) + "'!");
+        if (Ops.empty()) {
+          LLVMContext &Ctx = *DAG.getContext();
+          Ctx.emitError(CS.getInstruction(),
+                        "invalid operand for inline asm constraint '" +
+                        Twine(OpInfo.ConstraintCode) + "'");
+          break;
+        }
 
         // Add information to the INLINEASM node to know about this input.
         unsigned ResOpType =
@@ -6281,9 +6222,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
              "Don't know how to handle indirect register inputs yet!");
 
       // Copy the input into the appropriate registers.
-      if (OpInfo.AssignedRegs.Regs.empty())
-        report_fatal_error("Couldn't allocate input reg for constraint '" +
-                           Twine(OpInfo.ConstraintCode) + "'!");
+      if (OpInfo.AssignedRegs.Regs.empty()) {
+        LLVMContext &Ctx = *DAG.getContext();
+        Ctx.emitError(CS.getInstruction(), 
+                      "couldn't allocate input reg for constraint '" +
+                           Twine(OpInfo.ConstraintCode) + "'");
+        break;
+      }
 
       OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
                                         Chain, &Flag);
@@ -6421,7 +6366,7 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy,
                             bool RetSExt, bool RetZExt, bool isVarArg,
                             bool isInreg, unsigned NumFixedArgs,
                             CallingConv::ID CallConv, bool isTailCall,
-                            bool isReturnValueUsed,
+                            bool doesNotRet, bool isReturnValueUsed,
                             SDValue Callee,
                             ArgListTy &Args, SelectionDAG &DAG,
                             DebugLoc dl) const {
@@ -6518,7 +6463,7 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy,
   }
 
   SmallVector<SDValue, 4> InVals;
-  Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall,
+  Chain = LowerCall(Chain, Callee, CallConv, isVarArg, doesNotRet, isTailCall,
                     Outs, OutVals, Ins, dl, DAG, InVals);
 
   // Verify that the target's LowerCall behaved as expected.
@@ -6587,7 +6532,6 @@ void TargetLowering::LowerOperationWrapper(SDNode *N,
 
 SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   llvm_unreachable("LowerOperation not implemented for this target!");
-  return SDValue();
 }
 
 void
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 5147b6c..8cf88e1 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -72,7 +72,6 @@ class TargetLowering;
 class TruncInst;
 class UIToFPInst;
 class UnreachableInst;
-class UnwindInst;
 class VAArgInst;
 class ZExtInst;
 
@@ -130,13 +129,13 @@ private:
   /// Case - A struct to record the Value for a switch case, and the
   /// case's target basic block.
   struct Case {
-    Constant* Low;
-    Constant* High;
+    const Constant *Low;
+    const Constant *High;
     MachineBasicBlock* BB;
     uint32_t ExtraWeight;
 
     Case() : Low(0), High(0), BB(0), ExtraWeight(0) { }
-    Case(Constant* low, Constant* high, MachineBasicBlock* bb,
+    Case(const Constant *low, const Constant *high, MachineBasicBlock *bb,
          uint32_t extraweight) : Low(low), High(high), BB(bb),
          ExtraWeight(extraweight) { }
 
@@ -454,7 +453,8 @@ private:
                                 MachineBasicBlock* Default,
                                 MachineBasicBlock *SwitchBB);
 
-  uint32_t getEdgeWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst);
+  uint32_t getEdgeWeight(const MachineBasicBlock *Src,
+                         const MachineBasicBlock *Dst) const;
   void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst,
                               uint32_t Weight = 0);
 public:
@@ -474,7 +474,6 @@ private:
   // These all get lowered before this pass.
   void visitInvoke(const InvokeInst &I);
   void visitResume(const ResumeInst &I);
-  void visitUnwind(const UnwindInst &I);
 
   void visitBinary(const User &I, unsigned OpCode);
   void visitShift(const User &I, unsigned Opcode);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 3c95059..2173d8d 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -74,7 +74,6 @@ STATISTIC(NumFastIselFailSwitch,"Fast isel fails on Switch");
 STATISTIC(NumFastIselFailIndirectBr,"Fast isel fails on IndirectBr");
 STATISTIC(NumFastIselFailInvoke,"Fast isel fails on Invoke");
 STATISTIC(NumFastIselFailResume,"Fast isel fails on Resume");
-STATISTIC(NumFastIselFailUnwind,"Fast isel fails on Unwind");
 STATISTIC(NumFastIselFailUnreachable,"Fast isel fails on Unreachable");
 
   // Standard binary operators...
@@ -218,12 +217,15 @@ namespace llvm {
                                              CodeGenOpt::Level OptLevel) {
     const TargetLowering &TLI = IS->getTargetLowering();
 
-    if (OptLevel == CodeGenOpt::None)
+    if (OptLevel == CodeGenOpt::None ||
+        TLI.getSchedulingPreference() == Sched::Source)
       return createSourceListDAGScheduler(IS, OptLevel);
     if (TLI.getSchedulingPreference() == Sched::RegPressure)
       return createBURRListDAGScheduler(IS, OptLevel);
     if (TLI.getSchedulingPreference() == Sched::Hybrid)
       return createHybridListDAGScheduler(IS, OptLevel);
+    if (TLI.getSchedulingPreference() == Sched::VLIW)
+      return createVLIWDAGScheduler(IS, OptLevel);
     assert(TLI.getSchedulingPreference() == Sched::ILP &&
            "Unknown sched type!");
     return createILPListDAGScheduler(IS, OptLevel);
@@ -248,7 +250,6 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
           "TargetLowering::EmitInstrWithCustomInserter!";
 #endif
   llvm_unreachable(0);
-  return 0;
 }
 
 void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
@@ -262,6 +263,8 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
 // SelectionDAGISel code
 //===----------------------------------------------------------------------===//
 
+void SelectionDAGISel::ISelUpdater::anchor() { }
+
 SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm,
                                    CodeGenOpt::Level OL) :
   MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()),
@@ -452,7 +455,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   }
 
   // Determine if there is a call to setjmp in the machine function.
-  MF->setCallsSetJmp(Fn.callsFunctionThatReturnsTwice());
+  MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice());
 
   // Replace forward-declared registers with the registers containing
   // the desired value.
@@ -777,37 +780,12 @@ void SelectionDAGISel::PrepareEHLandingPad() {
     .addSym(Label);
 
   // Mark exception register as live in.
-  unsigned Reg = TLI.getExceptionAddressRegister();
+  unsigned Reg = TLI.getExceptionPointerRegister();
   if (Reg) MBB->addLiveIn(Reg);
 
   // Mark exception selector register as live in.
   Reg = TLI.getExceptionSelectorRegister();
   if (Reg) MBB->addLiveIn(Reg);
-
-  // FIXME: Hack around an exception handling flaw (PR1508): the personality
-  // function and list of typeids logically belong to the invoke (or, if you
-  // like, the basic block containing the invoke), and need to be associated
-  // with it in the dwarf exception handling tables.  Currently however the
-  // information is provided by an intrinsic (eh.selector) that can be moved
-  // to unexpected places by the optimizers: if the unwind edge is critical,
-  // then breaking it can result in the intrinsics being in the successor of
-  // the landing pad, not the landing pad itself.  This results
-  // in exceptions not being caught because no typeids are associated with
-  // the invoke.  This may not be the only way things can go wrong, but it
-  // is the only way we try to work around for the moment.
-  const BasicBlock *LLVMBB = MBB->getBasicBlock();
-  const BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator());
-
-  if (Br && Br->isUnconditional()) { // Critical edge?
-    BasicBlock::const_iterator I, E;
-    for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I)
-      if (isa<EHSelectorInst>(I))
-        break;
-
-    if (I == E)
-      // No catch info found - try to extract some from the successor.
-      CopyCatchInfo(Br->getSuccessor(0), LLVMBB, &MF->getMMI(), *FuncInfo);
-  }
 }
 
 /// TryToFoldFastISelLoad - We're checking to see if we can fold the specified
@@ -901,6 +879,10 @@ static bool isFoldedOrDeadInstruction(const Instruction *I,
 }
 
 #ifndef NDEBUG
+// Collect per Instruction statistics for fast-isel misses.  Only those
+// instructions that cause the bail are accounted for.  It does not account for
+// instructions higher in the block.  Thus, summing the per instructions stats
+// will not add up to what is reported by NumFastIselFailures.
 static void collectFailStats(const Instruction *I) {
   switch (I->getOpcode()) {
   default: assert (0 && "<Invalid operator> ");
@@ -912,7 +894,6 @@ static void collectFailStats(const Instruction *I) {
   case Instruction::IndirectBr:  NumFastIselFailIndirectBr++; return;
   case Instruction::Invoke:      NumFastIselFailInvoke++; return;
   case Instruction::Resume:      NumFastIselFailResume++; return;
-  case Instruction::Unwind:      NumFastIselFailUnwind++; return;
   case Instruction::Unreachable: NumFastIselFailUnreachable++; return;
 
   // Standard binary operators...
@@ -974,7 +955,6 @@ static void collectFailStats(const Instruction *I) {
   case Instruction::InsertValue:    NumFastIselFailInsertValue++; return;
   case Instruction::LandingPad:     NumFastIselFailLandingPad++; return;
   }
-  return;
 }
 #endif
 
@@ -2199,6 +2179,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
   case ISD::EntryToken:       // These nodes remain the same.
   case ISD::BasicBlock:
   case ISD::Register:
+  case ISD::RegisterMask:
   //case ISD::VALUETYPE:
   //case ISD::CONDCODE:
   case ISD::HANDLENODE:
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index a7cf089..6cde05a 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -27,7 +27,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/Config/config.h"
 using namespace llvm;
 
 namespace llvm {
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 9ced1ac..792de75 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -572,21 +572,22 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
   // ConstantFP nodes default to expand.  Targets can either change this to
   // Legal, in which case all fp constants are legal, or use isFPImmLegal()
   // to optimize expansions for certain constants.
+  setOperationAction(ISD::ConstantFP, MVT::f16, Expand);
   setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
   setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
   setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
 
   // These library functions default to expand.
-  setOperationAction(ISD::FLOG ,  MVT::f64, Expand);
-  setOperationAction(ISD::FLOG2,  MVT::f64, Expand);
-  setOperationAction(ISD::FLOG10, MVT::f64, Expand);
-  setOperationAction(ISD::FEXP ,  MVT::f64, Expand);
-  setOperationAction(ISD::FEXP2,  MVT::f64, Expand);
-  setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
-  setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
-  setOperationAction(ISD::FCEIL,  MVT::f64, Expand);
-  setOperationAction(ISD::FRINT,  MVT::f64, Expand);
-  setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
+  setOperationAction(ISD::FLOG ,  MVT::f16, Expand);
+  setOperationAction(ISD::FLOG2,  MVT::f16, Expand);
+  setOperationAction(ISD::FLOG10, MVT::f16, Expand);
+  setOperationAction(ISD::FEXP ,  MVT::f16, Expand);
+  setOperationAction(ISD::FEXP2,  MVT::f16, Expand);
+  setOperationAction(ISD::FFLOOR, MVT::f16, Expand);
+  setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand);
+  setOperationAction(ISD::FCEIL,  MVT::f16, Expand);
+  setOperationAction(ISD::FRINT,  MVT::f16, Expand);
+  setOperationAction(ISD::FTRUNC, MVT::f16, Expand);
   setOperationAction(ISD::FLOG ,  MVT::f32, Expand);
   setOperationAction(ISD::FLOG2,  MVT::f32, Expand);
   setOperationAction(ISD::FLOG10, MVT::f32, Expand);
@@ -597,6 +598,16 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
   setOperationAction(ISD::FCEIL,  MVT::f32, Expand);
   setOperationAction(ISD::FRINT,  MVT::f32, Expand);
   setOperationAction(ISD::FTRUNC, MVT::f32, Expand);
+  setOperationAction(ISD::FLOG ,  MVT::f64, Expand);
+  setOperationAction(ISD::FLOG2,  MVT::f64, Expand);
+  setOperationAction(ISD::FLOG10, MVT::f64, Expand);
+  setOperationAction(ISD::FEXP ,  MVT::f64, Expand);
+  setOperationAction(ISD::FEXP2,  MVT::f64, Expand);
+  setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
+  setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
+  setOperationAction(ISD::FCEIL,  MVT::f64, Expand);
+  setOperationAction(ISD::FRINT,  MVT::f64, Expand);
+  setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
 
   // Default ISD::TRAP to expand (which turns it into abort).
   setOperationAction(ISD::TRAP, MVT::Other, Expand);
@@ -1597,23 +1608,40 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     }
     break;
   case ISD::SIGN_EXTEND_INREG: {
-    EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+    EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+
+    APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1);
+    // If we only care about the highest bit, don't bother shifting right.
+    if (MsbMask == DemandedMask) {
+      unsigned ShAmt = ExVT.getScalarType().getSizeInBits();
+      SDValue InOp = Op.getOperand(0);
+
+      // Compute the correct shift amount type, which must be getShiftAmountTy
+      // for scalar types after legalization.
+      EVT ShiftAmtTy = Op.getValueType();
+      if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
+        ShiftAmtTy = getShiftAmountTy(ShiftAmtTy);
+
+      SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, ShiftAmtTy);
+      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
+                                            Op.getValueType(), InOp, ShiftAmt));
+    }
 
     // Sign extension.  Compute the demanded bits in the result that are not
     // present in the input.
     APInt NewBits =
       APInt::getHighBitsSet(BitWidth,
-                            BitWidth - EVT.getScalarType().getSizeInBits());
+                            BitWidth - ExVT.getScalarType().getSizeInBits());
 
     // If none of the extended bits are demanded, eliminate the sextinreg.
     if ((NewBits & NewMask) == 0)
       return TLO.CombineTo(Op, Op.getOperand(0));
 
     APInt InSignBit =
-      APInt::getSignBit(EVT.getScalarType().getSizeInBits()).zext(BitWidth);
+      APInt::getSignBit(ExVT.getScalarType().getSizeInBits()).zext(BitWidth);
     APInt InputDemandedBits =
       APInt::getLowBitsSet(BitWidth,
-                           EVT.getScalarType().getSizeInBits()) &
+                           ExVT.getScalarType().getSizeInBits()) &
       NewMask;
 
     // Since the sign extended bits are demanded, we know that the sign
@@ -1631,7 +1659,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     // If the input sign bit is known zero, convert this into a zero extension.
     if (KnownZero.intersects(InSignBit))
       return TLO.CombineTo(Op,
-                           TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,EVT));
+                          TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,ExVT));
 
     if (KnownOne.intersects(InSignBit)) {    // Input sign bit known set
       KnownOne |= NewBits;
@@ -2995,7 +3023,6 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
 /// is.
 static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
   switch (CT) {
-  default: llvm_unreachable("Unknown constraint type!");
   case TargetLowering::C_Other:
   case TargetLowering::C_Unknown:
     return 0;
@@ -3006,6 +3033,7 @@ static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
   case TargetLowering::C_Memory:
     return 3;
   }
+  llvm_unreachable("Invalid constraint type");
 }
 
 /// Examine constraint type and operand type and determine a weight value.