389 files changed, 24366 insertions, 13935 deletions
diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp
index 761cd46..1053955 100644
--- a/lib/Analysis/AliasAnalysisCounter.cpp
+++ b/lib/Analysis/AliasAnalysisCounter.cpp
@@ -162,7 +162,7 @@ AliasAnalysisCounter::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
     errs() << MRString << ":  Ptr: ";
     errs() << "[" << Size << "B] ";
     WriteAsOperand(errs(), P, true, M);
-    errs() << "\t<->" << *CS.getInstruction();
+    errs() << "\t<->" << *CS.getInstruction() << '\n';
   }
   return R;
 }
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index 6b0a956..308b9e3 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -115,11 +115,11 @@ bool AAEval::runOnFunction(Function &F) {
   SetVector<CallSite> CallSites;
 
   for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I)
-    if (isa<PointerType>(I->getType()))    // Add all pointer arguments
+    if (I->getType()->isPointerTy())    // Add all pointer arguments
       Pointers.insert(I);
 
   for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
-    if (isa<PointerType>(I->getType())) // Add all pointer instructions
+    if (I->getType()->isPointerTy()) // Add all pointer instructions
       Pointers.insert(&*I);
     Instruction &Inst = *I;
     User::op_iterator OI = Inst.op_begin();
@@ -128,7 +128,7 @@ bool AAEval::runOnFunction(Function &F) {
         isa<Function>(CS.getCalledValue()))
       ++OI;  // Skip actual functions for direct function calls.
     for (; OI != Inst.op_end(); ++OI)
-      if (isa<PointerType>((*OI)->getType()) && !isa<ConstantPointerNull>(*OI))
+      if ((*OI)->getType()->isPointerTy() && !isa<ConstantPointerNull>(*OI))
         Pointers.insert(*OI);
 
     if (CS.getInstruction()) CallSites.insert(CS);
diff --git a/lib/Analysis/Android.mk b/lib/Analysis/Android.mk
new file mode 100644
index 0000000..1d038f2
--- /dev/null
+++ b/lib/Analysis/Android.mk
@@ -0,0 +1,69 @@
+LOCAL_PATH:= $(call my-dir)
+
+analysis_SRC_FILES :=	\
+	AliasAnalysis.cpp	\
+	AliasAnalysisCounter.cpp	\
+	AliasAnalysisEvaluator.cpp	\
+	AliasDebugger.cpp	\
+	AliasSetTracker.cpp	\
+	Analysis.cpp	\
+	BasicAliasAnalysis.cpp	\
+	CFGPrinter.cpp	\
+	CaptureTracking.cpp	\
+	ConstantFolding.cpp	\
+	DbgInfoPrinter.cpp	\
+	DebugInfo.cpp	\
+	DomPrinter.cpp	\
+	IVUsers.cpp	\
+	InlineCost.cpp	\
+	InstCount.cpp	\
+	InstructionSimplify.cpp	\
+	Interval.cpp	\
+	IntervalPartition.cpp	\
+	LazyValueInfo.cpp	\
+	LibCallAliasAnalysis.cpp	\
+	LibCallSemantics.cpp	\
+	LiveValues.cpp	\
+	MemoryBuiltins.cpp	\
+	MemoryDependenceAnalysis.cpp	\
+	LoopDependenceAnalysis.cpp	\
+	LoopInfo.cpp	\
+	LoopPass.cpp	\
+	PHITransAddr.cpp	\
+	PointerTracking.cpp	\
+	PostDominators.cpp	\
+	ProfileEstimatorPass.cpp	\
+	ProfileInfo.cpp	\
+	ProfileInfoLoader.cpp	\
+	ProfileInfoLoaderPass.cpp	\
+	ProfileVerifierPass.cpp	\
+	ScalarEvolution.cpp	\
+	ScalarEvolutionAliasAnalysis.cpp	\
+	ScalarEvolutionExpander.cpp	\
+	SparsePropagation.cpp	\
+	Trace.cpp	\
+	ValueTracking.cpp
+
+# For the host
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(analysis_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMAnalysis
+
+include $(LLVM_HOST_BUILD_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+# For the device
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(analysis_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMAnalysis
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 36b831c..31a649d 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -290,7 +290,7 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
     for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
          CI != CE; ++CI, ++ArgNo) {
       // Only look at the no-capture pointer arguments.
-      if (!isa<PointerType>((*CI)->getType()) ||
+      if (!(*CI)->getType()->isPointerTy() ||
           !CS.paramHasAttr(ArgNo+1, Attribute::NoCapture))
         continue;
       
@@ -662,7 +662,7 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size,
   // Are we checking for alias of the same value?
   if (V1 == V2) return MustAlias;
 
-  if (!isa<PointerType>(V1->getType()) || !isa<PointerType>(V2->getType()))
+  if (!V1->getType()->isPointerTy() || !V2->getType()->isPointerTy())
     return NoAlias;  // Scalars cannot alias each other
 
   // Figure out what objects these things are pointing to if we can.
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index 10a8b11..8767c18 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -44,7 +44,7 @@ static int const Threshold = 20;
 /// counts as capturing it or not.
 bool llvm::PointerMayBeCaptured(const Value *V,
                                 bool ReturnCaptures, bool StoreCaptures) {
-  assert(isa<PointerType>(V->getType()) && "Capture is for pointers only!");
+  assert(V->getType()->isPointerTy() && "Capture is for pointers only!");
   SmallVector<Use*, Threshold> Worklist;
   SmallSet<Use*, Threshold> Visited;
   int Count = 0;
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index ba87040..114db2d 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -80,7 +80,7 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy,
   
   // First thing is first.  We only want to think about integer here, so if
   // we have something in FP form, recast it as integer.
-  if (DstEltTy->isFloatingPoint()) {
+  if (DstEltTy->isFloatingPointTy()) {
     // Fold to an vector of integers with same size as our FP type.
     unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits();
     const Type *DestIVTy =
@@ -95,7 +95,7 @@ static Constant *FoldBitCast(Constant *C, const Type *DestTy,
   
   // Okay, we know the destination is integer, if the input is FP, convert
   // it to integer first.
-  if (SrcEltTy->isFloatingPoint()) {
+  if (SrcEltTy->isFloatingPointTy()) {
     unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
     const Type *SrcIVTy =
       VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElt);
@@ -359,7 +359,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
       MapTy = Type::getInt32PtrTy(C->getContext());
     else if (LoadTy->isDoubleTy())
       MapTy = Type::getInt64PtrTy(C->getContext());
-    else if (isa<VectorType>(LoadTy)) {
+    else if (LoadTy->isVectorTy()) {
       MapTy = IntegerType::get(C->getContext(),
                                TD.getTypeAllocSizeInBits(LoadTy));
       MapTy = PointerType::getUnqual(MapTy);
@@ -605,7 +605,7 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps,
   SmallVector<Constant*, 32> NewIdxs;
   do {
     if (const SequentialType *ATy = dyn_cast<SequentialType>(Ty)) {
-      if (isa<PointerType>(ATy)) {
+      if (ATy->isPointerTy()) {
         // The only pointer indexing we'll do is on the first index of the GEP.
         if (!NewIdxs.empty())
           break;
@@ -783,45 +783,12 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
     // If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if
     // the int size is >= the ptr size.  This requires knowing the width of a
     // pointer, so it can't be done in ConstantExpr::getCast.
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) {
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0]))
       if (TD &&
-          TD->getPointerSizeInBits() <=
-          CE->getType()->getScalarSizeInBits()) {
-        if (CE->getOpcode() == Instruction::PtrToInt)
-          return FoldBitCast(CE->getOperand(0), DestTy, *TD);
-        
-        // If there's a constant offset added to the integer value before
-        // it is casted back to a pointer, see if the expression can be
-        // converted into a GEP.
-        if (CE->getOpcode() == Instruction::Add)
-          if (ConstantInt *L = dyn_cast<ConstantInt>(CE->getOperand(0)))
-            if (ConstantExpr *R = dyn_cast<ConstantExpr>(CE->getOperand(1)))
-              if (R->getOpcode() == Instruction::PtrToInt)
-                if (GlobalVariable *GV =
-                      dyn_cast<GlobalVariable>(R->getOperand(0))) {
-                  const PointerType *GVTy = cast<PointerType>(GV->getType());
-                  if (const ArrayType *AT =
-                        dyn_cast<ArrayType>(GVTy->getElementType())) {
-                    const Type *ElTy = AT->getElementType();
-                    uint64_t AllocSize = TD->getTypeAllocSize(ElTy);
-                    APInt PSA(L->getValue().getBitWidth(), AllocSize);
-                    if (ElTy == cast<PointerType>(DestTy)->getElementType() &&
-                        L->getValue().urem(PSA) == 0) {
-                      APInt ElemIdx = L->getValue().udiv(PSA);
-                      if (ElemIdx.ult(APInt(ElemIdx.getBitWidth(),
-                                            AT->getNumElements()))) {
-                        Constant *Index[] = {
-                          Constant::getNullValue(CE->getType()),
-                          ConstantInt::get(ElTy->getContext(), ElemIdx)
-                        };
-                        return
-                        ConstantExpr::getGetElementPtr(GV, &Index[0], 2);
-                      }
-                    }
-                  }
-                }
-      }
-    }
+          TD->getPointerSizeInBits() <= CE->getType()->getScalarSizeInBits() &&
+          CE->getOpcode() == Instruction::PtrToInt)
+        return FoldBitCast(CE->getOperand(0), DestTy, *TD);
+
     return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
   case Instruction::Trunc:
   case Instruction::ZExt:
@@ -1179,6 +1146,12 @@ llvm::ConstantFoldCall(Function *F,
       return 0;
     }
     
+    if (isa<UndefValue>(Operands[0])) {
+      if (Name.startswith("llvm.bswap"))
+        return Operands[0];
+      return 0;
+    }
+
     return 0;
   }
   
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index 258f1db..5cfe666 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -1007,12 +1007,15 @@ DIVariable DIFactory::CreateComplexVariable(unsigned Tag, DIDescriptor Context,
 
 /// CreateBlock - This creates a descriptor for a lexical block with the
 /// specified parent VMContext.
-DILexicalBlock DIFactory::CreateLexicalBlock(DIDescriptor Context) {
+DILexicalBlock DIFactory::CreateLexicalBlock(DIDescriptor Context,
+                                             unsigned LineNo, unsigned Col) {
   Value *Elts[] = {
     GetTagConstant(dwarf::DW_TAG_lexical_block),
-    Context.getNode()
+    Context.getNode(),
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Col)
   };
-  return DILexicalBlock(MDNode::get(VMContext, &Elts[0], 2));
+  return DILexicalBlock(MDNode::get(VMContext, &Elts[0], 4));
 }
 
 /// CreateNameSpace - This creates new descriptor for a namespace
diff --git a/lib/Analysis/IPA/Andersens.cpp b/lib/Analysis/IPA/Andersens.cpp
deleted file mode 100644
index 4180206..0000000
--- a/lib/Analysis/IPA/Andersens.cpp
+++ /dev/null
@@ -1,2868 +0,0 @@
-//===- Andersens.cpp - Andersen's Interprocedural Alias Analysis ----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines an implementation of Andersen's interprocedural alias
-// analysis
-//
-// In pointer analysis terms, this is a subset-based, flow-insensitive,
-// field-sensitive, and context-insensitive algorithm pointer algorithm.
-//
-// This algorithm is implemented as three stages:
-//   1. Object identification.
-//   2. Inclusion constraint identification.
-//   3. Offline constraint graph optimization
-//   4. Inclusion constraint solving.
-//
-// The object identification stage identifies all of the memory objects in the
-// program, which includes globals, heap allocated objects, and stack allocated
-// objects.
-//
-// The inclusion constraint identification stage finds all inclusion constraints
-// in the program by scanning the program, looking for pointer assignments and
-// other statements that effect the points-to graph.  For a statement like "A =
-// B", this statement is processed to indicate that A can point to anything that
-// B can point to.  Constraints can handle copies, loads, and stores, and
-// address taking.
-//
-// The offline constraint graph optimization portion includes offline variable
-// substitution algorithms intended to compute pointer and location
-// equivalences.  Pointer equivalences are those pointers that will have the
-// same points-to sets, and location equivalences are those variables that
-// always appear together in points-to sets.  It also includes an offline
-// cycle detection algorithm that allows cycles to be collapsed sooner 
-// during solving.
-//
-// The inclusion constraint solving phase iteratively propagates the inclusion
-// constraints until a fixed point is reached.  This is an O(N^3) algorithm.
-//
-// Function constraints are handled as if they were structs with X fields.
-// Thus, an access to argument X of function Y is an access to node index
-// getNode(Y) + X.  This representation allows handling of indirect calls
-// without any issues.  To wit, an indirect call Y(a,b) is equivalent to
-// *(Y + 1) = a, *(Y + 2) = b.
-// The return node for a function is always located at getNode(F) +
-// CallReturnPos. The arguments start at getNode(F) + CallArgPos.
-//
-// Future Improvements:
-//   Use of BDD's.
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "anders-aa"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/InstIterator.h"
-#include "llvm/Support/InstVisitor.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/System/Atomic.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/SparseBitVector.h"
-#include "llvm/ADT/DenseSet.h"
-#include <algorithm>
-#include <set>
-#include <list>
-#include <map>
-#include <stack>
-#include <vector>
-#include <queue>
-
-// Determining the actual set of nodes the universal set can consist of is very
-// expensive because it means propagating around very large sets.  We rely on
-// other analysis being able to determine which nodes can never be pointed to in
-// order to disambiguate further than "points-to anything".
-#define FULL_UNIVERSAL 0
-
-using namespace llvm;
-#ifndef NDEBUG
-STATISTIC(NumIters      , "Number of iterations to reach convergence");
-#endif
-STATISTIC(NumConstraints, "Number of constraints");
-STATISTIC(NumNodes      , "Number of nodes");
-STATISTIC(NumUnified    , "Number of variables unified");
-STATISTIC(NumErased     , "Number of redundant constraints erased");
-
-static const unsigned SelfRep = (unsigned)-1;
-static const unsigned Unvisited = (unsigned)-1;
-// Position of the function return node relative to the function node.
-static const unsigned CallReturnPos = 1;
-// Position of the function call node relative to the function node.
-static const unsigned CallFirstArgPos = 2;
-
-namespace {
-  struct BitmapKeyInfo {
-    static inline SparseBitVector<> *getEmptyKey() {
-      return reinterpret_cast<SparseBitVector<> *>(-1);
-    }
-    static inline SparseBitVector<> *getTombstoneKey() {
-      return reinterpret_cast<SparseBitVector<> *>(-2);
-    }
-    static unsigned getHashValue(const SparseBitVector<> *bitmap) {
-      return bitmap->getHashValue();
-    }
-    static bool isEqual(const SparseBitVector<> *LHS,
-                        const SparseBitVector<> *RHS) {
-      if (LHS == RHS)
-        return true;
-      else if (LHS == getEmptyKey() || RHS == getEmptyKey()
-               || LHS == getTombstoneKey() || RHS == getTombstoneKey())
-        return false;
-
-      return *LHS == *RHS;
-    }
-  };
-
-  class Andersens : public ModulePass, public AliasAnalysis,
-                    private InstVisitor<Andersens> {
-    struct Node;
-
-    /// Constraint - Objects of this structure are used to represent the various
-    /// constraints identified by the algorithm.  The constraints are 'copy',
-    /// for statements like "A = B", 'load' for statements like "A = *B",
-    /// 'store' for statements like "*A = B", and AddressOf for statements like
-    /// A = alloca;  The Offset is applied as *(A + K) = B for stores,
-    /// A = *(B + K) for loads, and A = B + K for copies.  It is
-    /// illegal on addressof constraints (because it is statically
-    /// resolvable to A = &C where C = B + K)
-
-    struct Constraint {
-      enum ConstraintType { Copy, Load, Store, AddressOf } Type;
-      unsigned Dest;
-      unsigned Src;
-      unsigned Offset;
-
-      Constraint(ConstraintType Ty, unsigned D, unsigned S, unsigned O = 0)
-        : Type(Ty), Dest(D), Src(S), Offset(O) {
-        assert((Offset == 0 || Ty != AddressOf) &&
-               "Offset is illegal on addressof constraints");
-      }
-
-      bool operator==(const Constraint &RHS) const {
-        return RHS.Type == Type
-          && RHS.Dest == Dest
-          && RHS.Src == Src
-          && RHS.Offset == Offset;
-      }
-
-      bool operator!=(const Constraint &RHS) const {
-        return !(*this == RHS);
-      }
-
-      bool operator<(const Constraint &RHS) const {
-        if (RHS.Type != Type)
-          return RHS.Type < Type;
-        else if (RHS.Dest != Dest)
-          return RHS.Dest < Dest;
-        else if (RHS.Src != Src)
-          return RHS.Src < Src;
-        return RHS.Offset < Offset;
-      }
-    };
-
-    // Information DenseSet requires implemented in order to be able to do
-    // it's thing
-    struct PairKeyInfo {
-      static inline std::pair<unsigned, unsigned> getEmptyKey() {
-        return std::make_pair(~0U, ~0U);
-      }
-      static inline std::pair<unsigned, unsigned> getTombstoneKey() {
-        return std::make_pair(~0U - 1, ~0U - 1);
-      }
-      static unsigned getHashValue(const std::pair<unsigned, unsigned> &P) {
-        return P.first ^ P.second;
-      }
-      static unsigned isEqual(const std::pair<unsigned, unsigned> &LHS,
-                              const std::pair<unsigned, unsigned> &RHS) {
-        return LHS == RHS;
-      }
-    };
-    
-    struct ConstraintKeyInfo {
-      static inline Constraint getEmptyKey() {
-        return Constraint(Constraint::Copy, ~0U, ~0U, ~0U);
-      }
-      static inline Constraint getTombstoneKey() {
-        return Constraint(Constraint::Copy, ~0U - 1, ~0U - 1, ~0U - 1);
-      }
-      static unsigned getHashValue(const Constraint &C) {
-        return C.Src ^ C.Dest ^ C.Type ^ C.Offset;
-      }
-      static bool isEqual(const Constraint &LHS,
-                          const Constraint &RHS) {
-        return LHS.Type == RHS.Type && LHS.Dest == RHS.Dest
-          && LHS.Src == RHS.Src && LHS.Offset == RHS.Offset;
-      }
-    };
-
-    // Node class - This class is used to represent a node in the constraint
-    // graph.  Due to various optimizations, it is not always the case that
-    // there is a mapping from a Node to a Value.  In particular, we add
-    // artificial Node's that represent the set of pointed-to variables shared
-    // for each location equivalent Node.
-    struct Node {
-    private:
-      static volatile sys::cas_flag Counter;
-
-    public:
-      Value *Val;
-      SparseBitVector<> *Edges;
-      SparseBitVector<> *PointsTo;
-      SparseBitVector<> *OldPointsTo;
-      std::list<Constraint> Constraints;
-
-      // Pointer and location equivalence labels
-      unsigned PointerEquivLabel;
-      unsigned LocationEquivLabel;
-      // Predecessor edges, both real and implicit
-      SparseBitVector<> *PredEdges;
-      SparseBitVector<> *ImplicitPredEdges;
-      // Set of nodes that point to us, only use for location equivalence.
-      SparseBitVector<> *PointedToBy;
-      // Number of incoming edges, used during variable substitution to early
-      // free the points-to sets
-      unsigned NumInEdges;
-      // True if our points-to set is in the Set2PEClass map
-      bool StoredInHash;
-      // True if our node has no indirect constraints (complex or otherwise)
-      bool Direct;
-      // True if the node is address taken, *or* it is part of a group of nodes
-      // that must be kept together.  This is set to true for functions and
-      // their arg nodes, which must be kept at the same position relative to
-      // their base function node.
-      bool AddressTaken;
-
-      // Nodes in cycles (or in equivalence classes) are united together using a
-      // standard union-find representation with path compression.  NodeRep
-      // gives the index into GraphNodes for the representative Node.
-      unsigned NodeRep;
-
-      // Modification timestamp.  Assigned from Counter.
-      // Used for work list prioritization.
-      unsigned Timestamp;
-
-      explicit Node(bool direct = true) :
-        Val(0), Edges(0), PointsTo(0), OldPointsTo(0), 
-        PointerEquivLabel(0), LocationEquivLabel(0), PredEdges(0),
-        ImplicitPredEdges(0), PointedToBy(0), NumInEdges(0),
-        StoredInHash(false), Direct(direct), AddressTaken(false),
-        NodeRep(SelfRep), Timestamp(0) { }
-
-      Node *setValue(Value *V) {
-        assert(Val == 0 && "Value already set for this node!");
-        Val = V;
-        return this;
-      }
-
-      /// getValue - Return the LLVM value corresponding to this node.
-      ///
-      Value *getValue() const { return Val; }
-
-      /// addPointerTo - Add a pointer to the list of pointees of this node,
-      /// returning true if this caused a new pointer to be added, or false if
-      /// we already knew about the points-to relation.
-      bool addPointerTo(unsigned Node) {
-        return PointsTo->test_and_set(Node);
-      }
-
-      /// intersects - Return true if the points-to set of this node intersects
-      /// with the points-to set of the specified node.
-      bool intersects(Node *N) const;
-
-      /// intersectsIgnoring - Return true if the points-to set of this node
-      /// intersects with the points-to set of the specified node on any nodes
-      /// except for the specified node to ignore.
-      bool intersectsIgnoring(Node *N, unsigned) const;
-
-      // Timestamp a node (used for work list prioritization)
-      void Stamp() {
-        Timestamp = sys::AtomicIncrement(&Counter);
-        --Timestamp;
-      }
-
-      bool isRep() const {
-        return( (int) NodeRep < 0 );
-      }
-    };
-
-    struct WorkListElement {
-      Node* node;
-      unsigned Timestamp;
-      WorkListElement(Node* n, unsigned t) : node(n), Timestamp(t) {}
-
-      // Note that we reverse the sense of the comparison because we
-      // actually want to give low timestamps the priority over high,
-      // whereas priority is typically interpreted as a greater value is
-      // given high priority.
-      bool operator<(const WorkListElement& that) const {
-        return( this->Timestamp > that.Timestamp );
-      }
-    };
-
-    // Priority-queue based work list specialized for Nodes.
-    class WorkList {
-      std::priority_queue<WorkListElement> Q;
-
-    public:
-      void insert(Node* n) {
-        Q.push( WorkListElement(n, n->Timestamp) );
-      }
-
-      // We automatically discard non-representative nodes and nodes
-      // that were in the work list twice (we keep a copy of the
-      // timestamp in the work list so we can detect this situation by
-      // comparing against the node's current timestamp).
-      Node* pop() {
-        while( !Q.empty() ) {
-          WorkListElement x = Q.top(); Q.pop();
-          Node* INode = x.node;
-
-          if( INode->isRep() &&
-              INode->Timestamp == x.Timestamp ) {
-            return(x.node);
-          }
-        }
-        return(0);
-      }
-
-      bool empty() {
-        return Q.empty();
-      }
-    };
-
-    /// GraphNodes - This vector is populated as part of the object
-    /// identification stage of the analysis, which populates this vector with a
-    /// node for each memory object and fills in the ValueNodes map.
-    std::vector<Node> GraphNodes;
-
-    /// ValueNodes - This map indicates the Node that a particular Value* is
-    /// represented by.  This contains entries for all pointers.
-    DenseMap<Value*, unsigned> ValueNodes;
-
-    /// ObjectNodes - This map contains entries for each memory object in the
-    /// program: globals, alloca's and mallocs.
-    DenseMap<Value*, unsigned> ObjectNodes;
-
-    /// ReturnNodes - This map contains an entry for each function in the
-    /// program that returns a value.
-    DenseMap<Function*, unsigned> ReturnNodes;
-
-    /// VarargNodes - This map contains the entry used to represent all pointers
-    /// passed through the varargs portion of a function call for a particular
-    /// function.  An entry is not present in this map for functions that do not
-    /// take variable arguments.
-    DenseMap<Function*, unsigned> VarargNodes;
-
-
-    /// Constraints - This vector contains a list of all of the constraints
-    /// identified by the program.
-    std::vector<Constraint> Constraints;
-
-    // Map from graph node to maximum K value that is allowed (for functions,
-    // this is equivalent to the number of arguments + CallFirstArgPos)
-    std::map<unsigned, unsigned> MaxK;
-
-    /// This enum defines the GraphNodes indices that correspond to important
-    /// fixed sets.
-    enum {
-      UniversalSet = 0,
-      NullPtr      = 1,
-      NullObject   = 2,
-      NumberSpecialNodes
-    };
-    // Stack for Tarjan's
-    std::stack<unsigned> SCCStack;
-    // Map from Graph Node to DFS number
-    std::vector<unsigned> Node2DFS;
-    // Map from Graph Node to Deleted from graph.
-    std::vector<bool> Node2Deleted;
-    // Same as Node Maps, but implemented as std::map because it is faster to
-    // clear 
-    std::map<unsigned, unsigned> Tarjan2DFS;
-    std::map<unsigned, bool> Tarjan2Deleted;
-    // Current DFS number
-    unsigned DFSNumber;
-
-    // Work lists.
-    WorkList w1, w2;
-    WorkList *CurrWL, *NextWL; // "current" and "next" work lists
-
-    // Offline variable substitution related things
-
-    // Temporary rep storage, used because we can't collapse SCC's in the
-    // predecessor graph by uniting the variables permanently, we can only do so
-    // for the successor graph.
-    std::vector<unsigned> VSSCCRep;
-    // Mapping from node to whether we have visited it during SCC finding yet.
-    std::vector<bool> Node2Visited;
-    // During variable substitution, we create unknowns to represent the unknown
-    // value that is a dereference of a variable.  These nodes are known as
-    // "ref" nodes (since they represent the value of dereferences).
-    unsigned FirstRefNode;
-    // During HVN, we create represent address taken nodes as if they were
-    // unknown (since HVN, unlike HU, does not evaluate unions).
-    unsigned FirstAdrNode;
-    // Current pointer equivalence class number
-    unsigned PEClass;
-    // Mapping from points-to sets to equivalence classes
-    typedef DenseMap<SparseBitVector<> *, unsigned, BitmapKeyInfo> BitVectorMap;
-    BitVectorMap Set2PEClass;
-    // Mapping from pointer equivalences to the representative node.  -1 if we
-    // have no representative node for this pointer equivalence class yet.
-    std::vector<int> PEClass2Node;
-    // Mapping from pointer equivalences to representative node.  This includes
-    // pointer equivalent but not location equivalent variables. -1 if we have
-    // no representative node for this pointer equivalence class yet.
-    std::vector<int> PENLEClass2Node;
-    // Union/Find for HCD
-    std::vector<unsigned> HCDSCCRep;
-    // HCD's offline-detected cycles; "Statically DeTected"
-    // -1 if not part of such a cycle, otherwise a representative node.
-    std::vector<int> SDT;
-    // Whether to use SDT (UniteNodes can use it during solving, but not before)
-    bool SDTActive;
-
-  public:
-    static char ID;
-    Andersens() : ModulePass(&ID) {}
-
-    bool runOnModule(Module &M) {
-      InitializeAliasAnalysis(this);
-      IdentifyObjects(M);
-      CollectConstraints(M);
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "anders-aa-constraints"
-      DEBUG(PrintConstraints());
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "anders-aa"
-      SolveConstraints();
-      DEBUG(PrintPointsToGraph());
-
-      // Free the constraints list, as we don't need it to respond to alias
-      // requests.
-      std::vector<Constraint>().swap(Constraints);
-      //These are needed for Print() (-analyze in opt)
-      //ObjectNodes.clear();
-      //ReturnNodes.clear();
-      //VarargNodes.clear();
-      return false;
-    }
-
-    void releaseMemory() {
-      // FIXME: Until we have transitively required passes working correctly,
-      // this cannot be enabled!  Otherwise, using -count-aa with the pass
-      // causes memory to be freed too early. :(
-#if 0
-      // The memory objects and ValueNodes data structures at the only ones that
-      // are still live after construction.
-      std::vector<Node>().swap(GraphNodes);
-      ValueNodes.clear();
-#endif
-    }
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AliasAnalysis::getAnalysisUsage(AU);
-      AU.setPreservesAll();                         // Does not transform code
-    }
-
-    /// getAdjustedAnalysisPointer - This method is used when a pass implements
-    /// an analysis interface through multiple inheritance.  If needed, it
-    /// should override this to adjust the this pointer as needed for the
-    /// specified pass info.
-    virtual void *getAdjustedAnalysisPointer(const PassInfo *PI) {
-      if (PI->isPassID(&AliasAnalysis::ID))
-        return (AliasAnalysis*)this;
-      return this;
-    }
-                      
-    //------------------------------------------------
-    // Implement the AliasAnalysis API
-    //
-    AliasResult alias(const Value *V1, unsigned V1Size,
-                      const Value *V2, unsigned V2Size);
-    virtual ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size);
-    virtual ModRefResult getModRefInfo(CallSite CS1, CallSite CS2);
-    bool pointsToConstantMemory(const Value *P);
-
-    virtual void deleteValue(Value *V) {
-      ValueNodes.erase(V);
-      getAnalysis<AliasAnalysis>().deleteValue(V);
-    }
-
-    virtual void copyValue(Value *From, Value *To) {
-      ValueNodes[To] = ValueNodes[From];
-      getAnalysis<AliasAnalysis>().copyValue(From, To);
-    }
-
-  private:
-    /// getNode - Return the node corresponding to the specified pointer scalar.
-    ///
-    unsigned getNode(Value *V) {
-      if (Constant *C = dyn_cast<Constant>(V))
-        if (!isa<GlobalValue>(C))
-          return getNodeForConstantPointer(C);
-
-      DenseMap<Value*, unsigned>::iterator I = ValueNodes.find(V);
-      if (I == ValueNodes.end()) {
-#ifndef NDEBUG
-        V->dump();
-#endif
-        llvm_unreachable("Value does not have a node in the points-to graph!");
-      }
-      return I->second;
-    }
-
-    /// getObject - Return the node corresponding to the memory object for the
-    /// specified global or allocation instruction.
-    unsigned getObject(Value *V) const {
-      DenseMap<Value*, unsigned>::const_iterator I = ObjectNodes.find(V);
-      assert(I != ObjectNodes.end() &&
-             "Value does not have an object in the points-to graph!");
-      return I->second;
-    }
-
-    /// getReturnNode - Return the node representing the return value for the
-    /// specified function.
-    unsigned getReturnNode(Function *F) const {
-      DenseMap<Function*, unsigned>::const_iterator I = ReturnNodes.find(F);
-      assert(I != ReturnNodes.end() && "Function does not return a value!");
-      return I->second;
-    }
-
-    /// getVarargNode - Return the node representing the variable arguments
-    /// formal for the specified function.
-    unsigned getVarargNode(Function *F) const {
-      DenseMap<Function*, unsigned>::const_iterator I = VarargNodes.find(F);
-      assert(I != VarargNodes.end() && "Function does not take var args!");
-      return I->second;
-    }
-
-    /// getNodeValue - Get the node for the specified LLVM value and set the
-    /// value for it to be the specified value.
-    unsigned getNodeValue(Value &V) {
-      unsigned Index = getNode(&V);
-      GraphNodes[Index].setValue(&V);
-      return Index;
-    }
-
-    unsigned UniteNodes(unsigned First, unsigned Second,
-                        bool UnionByRank = true);
-    unsigned FindNode(unsigned Node);
-    unsigned FindNode(unsigned Node) const;
-
-    void IdentifyObjects(Module &M);
-    void CollectConstraints(Module &M);
-    bool AnalyzeUsesOfFunction(Value *);
-    void CreateConstraintGraph();
-    void OptimizeConstraints();
-    unsigned FindEquivalentNode(unsigned, unsigned);
-    void ClumpAddressTaken();
-    void RewriteConstraints();
-    void HU();
-    void HVN();
-    void HCD();
-    void Search(unsigned Node);
-    void UnitePointerEquivalences();
-    void SolveConstraints();
-    bool QueryNode(unsigned Node);
-    void Condense(unsigned Node);
-    void HUValNum(unsigned Node);
-    void HVNValNum(unsigned Node);
-    unsigned getNodeForConstantPointer(Constant *C);
-    unsigned getNodeForConstantPointerTarget(Constant *C);
-    void AddGlobalInitializerConstraints(unsigned, Constant *C);
-
-    void AddConstraintsForNonInternalLinkage(Function *F);
-    void AddConstraintsForCall(CallSite CS, Function *F);
-    bool AddConstraintsForExternalCall(CallSite CS, Function *F);
-
-
-    void PrintNode(const Node *N) const;
-    void PrintConstraints() const ;
-    void PrintConstraint(const Constraint &) const;
-    void PrintLabels() const;
-    void PrintPointsToGraph() const;
-
-    //===------------------------------------------------------------------===//
-    // Instruction visitation methods for adding constraints
-    //
-    friend class InstVisitor<Andersens>;
-    void visitReturnInst(ReturnInst &RI);
-    void visitInvokeInst(InvokeInst &II) { visitCallSite(CallSite(&II)); }
-    void visitCallInst(CallInst &CI) { 
-      if (isMalloc(&CI)) visitAlloc(CI);
-      else visitCallSite(CallSite(&CI)); 
-    }
-    void visitCallSite(CallSite CS);
-    void visitAllocaInst(AllocaInst &I);
-    void visitAlloc(Instruction &I);
-    void visitLoadInst(LoadInst &LI);
-    void visitStoreInst(StoreInst &SI);
-    void visitGetElementPtrInst(GetElementPtrInst &GEP);
-    void visitPHINode(PHINode &PN);
-    void visitCastInst(CastInst &CI);
-    void visitICmpInst(ICmpInst &ICI) {} // NOOP!
-    void visitFCmpInst(FCmpInst &ICI) {} // NOOP!
-    void visitSelectInst(SelectInst &SI);
-    void visitVAArg(VAArgInst &I);
-    void visitInstruction(Instruction &I);
-
-    //===------------------------------------------------------------------===//
-    // Implement Analyize interface
-    //
-    void print(raw_ostream &O, const Module*) const {
-      PrintPointsToGraph();
-    }
-  };
-}
-
-char Andersens::ID = 0;
-static RegisterPass<Andersens>
-X("anders-aa", "Andersen's Interprocedural Alias Analysis (experimental)",
-  false, true);
-static RegisterAnalysisGroup<AliasAnalysis> Y(X);
-
-// Initialize Timestamp Counter (static).
-volatile llvm::sys::cas_flag Andersens::Node::Counter = 0;
-
-ModulePass *llvm::createAndersensPass() { return new Andersens(); }
-
-//===----------------------------------------------------------------------===//
-//                  AliasAnalysis Interface Implementation
-//===----------------------------------------------------------------------===//
-
-AliasAnalysis::AliasResult Andersens::alias(const Value *V1, unsigned V1Size,
-                                            const Value *V2, unsigned V2Size) {
-  Node *N1 = &GraphNodes[FindNode(getNode(const_cast<Value*>(V1)))];
-  Node *N2 = &GraphNodes[FindNode(getNode(const_cast<Value*>(V2)))];
-
-  // Check to see if the two pointers are known to not alias.  They don't alias
-  // if their points-to sets do not intersect.
-  if (!N1->intersectsIgnoring(N2, NullObject))
-    return NoAlias;
-
-  return AliasAnalysis::alias(V1, V1Size, V2, V2Size);
-}
-
-AliasAnalysis::ModRefResult
-Andersens::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
-  // The only thing useful that we can contribute for mod/ref information is
-  // when calling external function calls: if we know that memory never escapes
-  // from the program, it cannot be modified by an external call.
-  //
-  // NOTE: This is not really safe, at least not when the entire program is not
-  // available.  The deal is that the external function could call back into the
-  // program and modify stuff.  We ignore this technical niggle for now.  This
-  // is, after all, a "research quality" implementation of Andersen's analysis.
-  if (Function *F = CS.getCalledFunction())
-    if (F->isDeclaration()) {
-      Node *N1 = &GraphNodes[FindNode(getNode(P))];
-
-      if (N1->PointsTo->empty())
-        return NoModRef;
-#if FULL_UNIVERSAL
-      if (!UniversalSet->PointsTo->test(FindNode(getNode(P))))
-        return NoModRef;  // Universal set does not contain P
-#else
-      if (!N1->PointsTo->test(UniversalSet))
-        return NoModRef;  // P doesn't point to the universal set.
-#endif
-    }
-
-  return AliasAnalysis::getModRefInfo(CS, P, Size);
-}
-
-AliasAnalysis::ModRefResult
-Andersens::getModRefInfo(CallSite CS1, CallSite CS2) {
-  return AliasAnalysis::getModRefInfo(CS1,CS2);
-}
-
-/// pointsToConstantMemory - If we can determine that this pointer only points
-/// to constant memory, return true.  In practice, this means that if the
-/// pointer can only point to constant globals, functions, or the null pointer,
-/// return true.
-///
-bool Andersens::pointsToConstantMemory(const Value *P) {
-  Node *N = &GraphNodes[FindNode(getNode(const_cast<Value*>(P)))];
-  unsigned i;
-
-  for (SparseBitVector<>::iterator bi = N->PointsTo->begin();
-       bi != N->PointsTo->end();
-       ++bi) {
-    i = *bi;
-    Node *Pointee = &GraphNodes[i];
-    if (Value *V = Pointee->getValue()) {
-      if (!isa<GlobalValue>(V) || (isa<GlobalVariable>(V) &&
-                                   !cast<GlobalVariable>(V)->isConstant()))
-        return AliasAnalysis::pointsToConstantMemory(P);
-    } else {
-      if (i != NullObject)
-        return AliasAnalysis::pointsToConstantMemory(P);
-    }
-  }
-
-  return true;
-}
-
-//===----------------------------------------------------------------------===//
-//                       Object Identification Phase
-//===----------------------------------------------------------------------===//
-
-/// IdentifyObjects - This stage scans the program, adding an entry to the
-/// GraphNodes list for each memory object in the program (global stack or
-/// heap), and populates the ValueNodes and ObjectNodes maps for these objects.
-///
-void Andersens::IdentifyObjects(Module &M) {
-  unsigned NumObjects = 0;
-
-  // Object #0 is always the universal set: the object that we don't know
-  // anything about.
-  assert(NumObjects == UniversalSet && "Something changed!");
-  ++NumObjects;
-
-  // Object #1 always represents the null pointer.
-  assert(NumObjects == NullPtr && "Something changed!");
-  ++NumObjects;
-
-  // Object #2 always represents the null object (the object pointed to by null)
-  assert(NumObjects == NullObject && "Something changed!");
-  ++NumObjects;
-
-  // Add all the globals first.
-  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
-       I != E; ++I) {
-    ObjectNodes[I] = NumObjects++;
-    ValueNodes[I] = NumObjects++;
-  }
-
-  // Add nodes for all of the functions and the instructions inside of them.
-  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
-    // The function itself is a memory object.
-    unsigned First = NumObjects;
-    ValueNodes[F] = NumObjects++;
-    if (isa<PointerType>(F->getFunctionType()->getReturnType()))
-      ReturnNodes[F] = NumObjects++;
-    if (F->getFunctionType()->isVarArg())
-      VarargNodes[F] = NumObjects++;
-
-
-    // Add nodes for all of the incoming pointer arguments.
-    for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
-         I != E; ++I)
-      {
-        if (isa<PointerType>(I->getType()))
-          ValueNodes[I] = NumObjects++;
-      }
-    MaxK[First] = NumObjects - First;
-
-    // Scan the function body, creating a memory object for each heap/stack
-    // allocation in the body of the function and a node to represent all
-    // pointer values defined by instructions and used as operands.
-    for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
-      // If this is an heap or stack allocation, create a node for the memory
-      // object.
-      if (isa<PointerType>(II->getType())) {
-        ValueNodes[&*II] = NumObjects++;
-        if (AllocaInst *AI = dyn_cast<AllocaInst>(&*II))
-          ObjectNodes[AI] = NumObjects++;
-        else if (isMalloc(&*II))
-          ObjectNodes[&*II] = NumObjects++;
-      }
-
-      // Calls to inline asm need to be added as well because the callee isn't
-      // referenced anywhere else.
-      if (CallInst *CI = dyn_cast<CallInst>(&*II)) {
-        Value *Callee = CI->getCalledValue();
-        if (isa<InlineAsm>(Callee))
-          ValueNodes[Callee] = NumObjects++;
-      }
-    }
-  }
-
-  // Now that we know how many objects to create, make them all now!
-  GraphNodes.resize(NumObjects);
-  NumNodes += NumObjects;
-}
-
-//===----------------------------------------------------------------------===//
-//                     Constraint Identification Phase
-//===----------------------------------------------------------------------===//
-
-/// getNodeForConstantPointer - Return the node corresponding to the constant
-/// pointer itself.
-unsigned Andersens::getNodeForConstantPointer(Constant *C) {
-  assert(isa<PointerType>(C->getType()) && "Not a constant pointer!");
-
-  if (isa<ConstantPointerNull>(C) || isa<UndefValue>(C))
-    return NullPtr;
-  else if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
-    return getNode(GV);
-  else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
-    switch (CE->getOpcode()) {
-    case Instruction::GetElementPtr:
-      return getNodeForConstantPointer(CE->getOperand(0));
-    case Instruction::IntToPtr:
-      return UniversalSet;
-    case Instruction::BitCast:
-      return getNodeForConstantPointer(CE->getOperand(0));
-    default:
-      errs() << "Constant Expr not yet handled: " << *CE << "\n";
-      llvm_unreachable(0);
-    }
-  } else {
-    llvm_unreachable("Unknown constant pointer!");
-  }
-  return 0;
-}
-
-/// getNodeForConstantPointerTarget - Return the node POINTED TO by the
-/// specified constant pointer.
-unsigned Andersens::getNodeForConstantPointerTarget(Constant *C) {
-  assert(isa<PointerType>(C->getType()) && "Not a constant pointer!");
-
-  if (isa<ConstantPointerNull>(C))
-    return NullObject;
-  else if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
-    return getObject(GV);
-  else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
-    switch (CE->getOpcode()) {
-    case Instruction::GetElementPtr:
-      return getNodeForConstantPointerTarget(CE->getOperand(0));
-    case Instruction::IntToPtr:
-      return UniversalSet;
-    case Instruction::BitCast:
-      return getNodeForConstantPointerTarget(CE->getOperand(0));
-    default:
-      errs() << "Constant Expr not yet handled: " << *CE << "\n";
-      llvm_unreachable(0);
-    }
-  } else {
-    llvm_unreachable("Unknown constant pointer!");
-  }
-  return 0;
-}
-
-/// AddGlobalInitializerConstraints - Add inclusion constraints for the memory
-/// object N, which contains values indicated by C.
-void Andersens::AddGlobalInitializerConstraints(unsigned NodeIndex,
-                                                Constant *C) {
-  if (C->getType()->isSingleValueType()) {
-    if (isa<PointerType>(C->getType()))
-      Constraints.push_back(Constraint(Constraint::Copy, NodeIndex,
-                                       getNodeForConstantPointer(C)));
-  } else if (C->isNullValue()) {
-    Constraints.push_back(Constraint(Constraint::Copy, NodeIndex,
-                                     NullObject));
-    return;
-  } else if (!isa<UndefValue>(C)) {
-    // If this is an array or struct, include constraints for each element.
-    assert(isa<ConstantArray>(C) || isa<ConstantStruct>(C));
-    for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
-      AddGlobalInitializerConstraints(NodeIndex,
-                                      cast<Constant>(C->getOperand(i)));
-  }
-}
-
-/// AddConstraintsForNonInternalLinkage - If this function does not have
-/// internal linkage, realize that we can't trust anything passed into or
-/// returned by this function.
-void Andersens::AddConstraintsForNonInternalLinkage(Function *F) {
-  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
-    if (isa<PointerType>(I->getType()))
-      // If this is an argument of an externally accessible function, the
-      // incoming pointer might point to anything.
-      Constraints.push_back(Constraint(Constraint::Copy, getNode(I),
-                                       UniversalSet));
-}
-
-/// AddConstraintsForCall - If this is a call to a "known" function, add the
-/// constraints and return true.  If this is a call to an unknown function,
-/// return false.
-bool Andersens::AddConstraintsForExternalCall(CallSite CS, Function *F) {
-  assert(F->isDeclaration() && "Not an external function!");
-
-  // These functions don't induce any points-to constraints.
-  if (F->getName() == "atoi" || F->getName() == "atof" ||
-      F->getName() == "atol" || F->getName() == "atoll" ||
-      F->getName() == "remove" || F->getName() == "unlink" ||
-      F->getName() == "rename" || F->getName() == "memcmp" ||
-      F->getName() == "llvm.memset" ||
-      F->getName() == "strcmp" || F->getName() == "strncmp" ||
-      F->getName() == "execl" || F->getName() == "execlp" ||
-      F->getName() == "execle" || F->getName() == "execv" ||
-      F->getName() == "execvp" || F->getName() == "chmod" ||
-      F->getName() == "puts" || F->getName() == "write" ||
-      F->getName() == "open" || F->getName() == "create" ||
-      F->getName() == "truncate" || F->getName() == "chdir" ||
-      F->getName() == "mkdir" || F->getName() == "rmdir" ||
-      F->getName() == "read" || F->getName() == "pipe" ||
-      F->getName() == "wait" || F->getName() == "time" ||
-      F->getName() == "stat" || F->getName() == "fstat" ||
-      F->getName() == "lstat" || F->getName() == "strtod" ||
-      F->getName() == "strtof" || F->getName() == "strtold" ||
-      F->getName() == "fopen" || F->getName() == "fdopen" ||
-      F->getName() == "freopen" ||
-      F->getName() == "fflush" || F->getName() == "feof" ||
-      F->getName() == "fileno" || F->getName() == "clearerr" ||
-      F->getName() == "rewind" || F->getName() == "ftell" ||
-      F->getName() == "ferror" || F->getName() == "fgetc" ||
-      F->getName() == "fgetc" || F->getName() == "_IO_getc" ||
-      F->getName() == "fwrite" || F->getName() == "fread" ||
-      F->getName() == "fgets" || F->getName() == "ungetc" ||
-      F->getName() == "fputc" ||
-      F->getName() == "fputs" || F->getName() == "putc" ||
-      F->getName() == "ftell" || F->getName() == "rewind" ||
-      F->getName() == "_IO_putc" || F->getName() == "fseek" ||
-      F->getName() == "fgetpos" || F->getName() == "fsetpos" ||
-      F->getName() == "printf" || F->getName() == "fprintf" ||
-      F->getName() == "sprintf" || F->getName() == "vprintf" ||
-      F->getName() == "vfprintf" || F->getName() == "vsprintf" ||
-      F->getName() == "scanf" || F->getName() == "fscanf" ||
-      F->getName() == "sscanf" || F->getName() == "__assert_fail" ||
-      F->getName() == "modf")
-    return true;
-
-
-  // These functions do induce points-to edges.
-  if (F->getName() == "llvm.memcpy" ||
-      F->getName() == "llvm.memmove" ||
-      F->getName() == "memmove") {
-
-    const FunctionType *FTy = F->getFunctionType();
-    if (FTy->getNumParams() > 1 && 
-        isa<PointerType>(FTy->getParamType(0)) &&
-        isa<PointerType>(FTy->getParamType(1))) {
-
-      // *Dest = *Src, which requires an artificial graph node to represent the
-      // constraint.  It is broken up into *Dest = temp, temp = *Src
-      unsigned FirstArg = getNode(CS.getArgument(0));
-      unsigned SecondArg = getNode(CS.getArgument(1));
-      unsigned TempArg = GraphNodes.size();
-      GraphNodes.push_back(Node());
-      Constraints.push_back(Constraint(Constraint::Store,
-                                       FirstArg, TempArg));
-      Constraints.push_back(Constraint(Constraint::Load,
-                                       TempArg, SecondArg));
-      // In addition, Dest = Src
-      Constraints.push_back(Constraint(Constraint::Copy,
-                                       FirstArg, SecondArg));
-      return true;
-    }
-  }
-
-  // Result = Arg0
-  if (F->getName() == "realloc" || F->getName() == "strchr" ||
-      F->getName() == "strrchr" || F->getName() == "strstr" ||
-      F->getName() == "strtok") {
-    const FunctionType *FTy = F->getFunctionType();
-    if (FTy->getNumParams() > 0 && 
-        isa<PointerType>(FTy->getParamType(0))) {
-      Constraints.push_back(Constraint(Constraint::Copy,
-                                       getNode(CS.getInstruction()),
-                                       getNode(CS.getArgument(0))));
-      return true;
-    }
-  }
-
-  return false;
-}
-
-
-
-/// AnalyzeUsesOfFunction - Look at all of the users of the specified function.
-/// If this is used by anything complex (i.e., the address escapes), return
-/// true.
-bool Andersens::AnalyzeUsesOfFunction(Value *V) {
-
-  if (!isa<PointerType>(V->getType())) return true;
-
-  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
-    if (isa<LoadInst>(*UI)) {
-      return false;
-    } else if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) {
-      if (V == SI->getOperand(1)) {
-        return false;
-      } else if (SI->getOperand(1)) {
-        return true;  // Storing the pointer
-      }
-    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) {
-      if (AnalyzeUsesOfFunction(GEP)) return true;
-    } else if (isFreeCall(*UI)) {
-      return false;
-    } else if (CallInst *CI = dyn_cast<CallInst>(*UI)) {
-      // Make sure that this is just the function being called, not that it is
-      // passing into the function.
-      for (unsigned i = 1, e = CI->getNumOperands(); i != e; ++i)
-        if (CI->getOperand(i) == V) return true;
-    } else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) {
-      // Make sure that this is just the function being called, not that it is
-      // passing into the function.
-      for (unsigned i = 3, e = II->getNumOperands(); i != e; ++i)
-        if (II->getOperand(i) == V) return true;
-    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) {
-      if (CE->getOpcode() == Instruction::GetElementPtr ||
-          CE->getOpcode() == Instruction::BitCast) {
-        if (AnalyzeUsesOfFunction(CE))
-          return true;
-      } else {
-        return true;
-      }
-    } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(*UI)) {
-      if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
-        return true;  // Allow comparison against null.
-    } else {
-      return true;
-    }
-  return false;
-}
-
-/// CollectConstraints - This stage scans the program, adding a constraint to
-/// the Constraints list for each instruction in the program that induces a
-/// constraint, and setting up the initial points-to graph.
-///
-void Andersens::CollectConstraints(Module &M) {
-  // First, the universal set points to itself.
-  Constraints.push_back(Constraint(Constraint::AddressOf, UniversalSet,
-                                   UniversalSet));
-  Constraints.push_back(Constraint(Constraint::Store, UniversalSet,
-                                   UniversalSet));
-
-  // Next, the null pointer points to the null object.
-  Constraints.push_back(Constraint(Constraint::AddressOf, NullPtr, NullObject));
-
-  // Next, add any constraints on global variables and their initializers.
-  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
-       I != E; ++I) {
-    // Associate the address of the global object as pointing to the memory for
-    // the global: &G = <G memory>
-    unsigned ObjectIndex = getObject(I);
-    Node *Object = &GraphNodes[ObjectIndex];
-    Object->setValue(I);
-    Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(*I),
-                                     ObjectIndex));
-
-    if (I->hasDefinitiveInitializer()) {
-      AddGlobalInitializerConstraints(ObjectIndex, I->getInitializer());
-    } else {
-      // If it doesn't have an initializer (i.e. it's defined in another
-      // translation unit), it points to the universal set.
-      Constraints.push_back(Constraint(Constraint::Copy, ObjectIndex,
-                                       UniversalSet));
-    }
-  }
-
-  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
-    // Set up the return value node.
-    if (isa<PointerType>(F->getFunctionType()->getReturnType()))
-      GraphNodes[getReturnNode(F)].setValue(F);
-    if (F->getFunctionType()->isVarArg())
-      GraphNodes[getVarargNode(F)].setValue(F);
-
-    // Set up incoming argument nodes.
-    for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
-         I != E; ++I)
-      if (isa<PointerType>(I->getType()))
-        getNodeValue(*I);
-
-    // At some point we should just add constraints for the escaping functions
-    // at solve time, but this slows down solving. For now, we simply mark
-    // address taken functions as escaping and treat them as external.
-    if (!F->hasLocalLinkage() || AnalyzeUsesOfFunction(F))
-      AddConstraintsForNonInternalLinkage(F);
-
-    if (!F->isDeclaration()) {
-      // Scan the function body, creating a memory object for each heap/stack
-      // allocation in the body of the function and a node to represent all
-      // pointer values defined by instructions and used as operands.
-      visit(F);
-    } else {
-      // External functions that return pointers return the universal set.
-      if (isa<PointerType>(F->getFunctionType()->getReturnType()))
-        Constraints.push_back(Constraint(Constraint::Copy,
-                                         getReturnNode(F),
-                                         UniversalSet));
-
-      // Any pointers that are passed into the function have the universal set
-      // stored into them.
-      for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
-           I != E; ++I)
-        if (isa<PointerType>(I->getType())) {
-          // Pointers passed into external functions could have anything stored
-          // through them.
-          Constraints.push_back(Constraint(Constraint::Store, getNode(I),
-                                           UniversalSet));
-          // Memory objects passed into external function calls can have the
-          // universal set point to them.
-#if FULL_UNIVERSAL
-          Constraints.push_back(Constraint(Constraint::Copy,
-                                           UniversalSet,
-                                           getNode(I)));
-#else
-          Constraints.push_back(Constraint(Constraint::Copy,
-                                           getNode(I),
-                                           UniversalSet));
-#endif
-        }
-
-      // If this is an external varargs function, it can also store pointers
-      // into any pointers passed through the varargs section.
-      if (F->getFunctionType()->isVarArg())
-        Constraints.push_back(Constraint(Constraint::Store, getVarargNode(F),
-                                         UniversalSet));
-    }
-  }
-  NumConstraints += Constraints.size();
-}
-
-
-void Andersens::visitInstruction(Instruction &I) {
-#ifdef NDEBUG
-  return;          // This function is just a big assert.
-#endif
-  if (isa<BinaryOperator>(I))
-    return;
-  // Most instructions don't have any effect on pointer values.
-  switch (I.getOpcode()) {
-  case Instruction::Br:
-  case Instruction::Switch:
-  case Instruction::Unwind:
-  case Instruction::Unreachable:
-  case Instruction::ICmp:
-  case Instruction::FCmp:
-    return;
-  default:
-    // Is this something we aren't handling yet?
-    errs() << "Unknown instruction: " << I;
-    llvm_unreachable(0);
-  }
-}
-
-void Andersens::visitAllocaInst(AllocaInst &I) {
-  visitAlloc(I);
-}
-
-void Andersens::visitAlloc(Instruction &I) {
-  unsigned ObjectIndex = getObject(&I);
-  GraphNodes[ObjectIndex].setValue(&I);
-  Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(I),
-                                   ObjectIndex));
-}
-
-void Andersens::visitReturnInst(ReturnInst &RI) {
-  if (RI.getNumOperands() && isa<PointerType>(RI.getOperand(0)->getType()))
-    // return V   -->   <Copy/retval{F}/v>
-    Constraints.push_back(Constraint(Constraint::Copy,
-                                     getReturnNode(RI.getParent()->getParent()),
-                                     getNode(RI.getOperand(0))));
-}
-
-void Andersens::visitLoadInst(LoadInst &LI) {
-  if (isa<PointerType>(LI.getType()))
-    // P1 = load P2  -->  <Load/P1/P2>
-    Constraints.push_back(Constraint(Constraint::Load, getNodeValue(LI),
-                                     getNode(LI.getOperand(0))));
-}
-
-void Andersens::visitStoreInst(StoreInst &SI) {
-  if (isa<PointerType>(SI.getOperand(0)->getType()))
-    // store P1, P2  -->  <Store/P2/P1>
-    Constraints.push_back(Constraint(Constraint::Store,
-                                     getNode(SI.getOperand(1)),
-                                     getNode(SI.getOperand(0))));
-}
-
-void Andersens::visitGetElementPtrInst(GetElementPtrInst &GEP) {
-  // P1 = getelementptr P2, ... --> <Copy/P1/P2>
-  Constraints.push_back(Constraint(Constraint::Copy, getNodeValue(GEP),
-                                   getNode(GEP.getOperand(0))));
-}
-
-void Andersens::visitPHINode(PHINode &PN) {
-  if (isa<PointerType>(PN.getType())) {
-    unsigned PNN = getNodeValue(PN);
-    for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
-      // P1 = phi P2, P3  -->  <Copy/P1/P2>, <Copy/P1/P3>, ...
-      Constraints.push_back(Constraint(Constraint::Copy, PNN,
-                                       getNode(PN.getIncomingValue(i))));
-  }
-}
-
-void Andersens::visitCastInst(CastInst &CI) {
-  Value *Op = CI.getOperand(0);
-  if (isa<PointerType>(CI.getType())) {
-    if (isa<PointerType>(Op->getType())) {
-      // P1 = cast P2  --> <Copy/P1/P2>
-      Constraints.push_back(Constraint(Constraint::Copy, getNodeValue(CI),
-                                       getNode(CI.getOperand(0))));
-    } else {
-      // P1 = cast int --> <Copy/P1/Univ>
-#if 0
-      Constraints.push_back(Constraint(Constraint::Copy, getNodeValue(CI),
-                                       UniversalSet));
-#else
-      getNodeValue(CI);
-#endif
-    }
-  } else if (isa<PointerType>(Op->getType())) {
-    // int = cast P1 --> <Copy/Univ/P1>
-#if 0
-    Constraints.push_back(Constraint(Constraint::Copy,
-                                     UniversalSet,
-                                     getNode(CI.getOperand(0))));
-#else
-    getNode(CI.getOperand(0));
-#endif
-  }
-}
-
-void Andersens::visitSelectInst(SelectInst &SI) {
-  if (isa<PointerType>(SI.getType())) {
-    unsigned SIN = getNodeValue(SI);
-    // P1 = select C, P2, P3   ---> <Copy/P1/P2>, <Copy/P1/P3>
-    Constraints.push_back(Constraint(Constraint::Copy, SIN,
-                                     getNode(SI.getOperand(1))));
-    Constraints.push_back(Constraint(Constraint::Copy, SIN,
-                                     getNode(SI.getOperand(2))));
-  }
-}
-
-void Andersens::visitVAArg(VAArgInst &I) {
-  llvm_unreachable("vaarg not handled yet!");
-}
-
-/// AddConstraintsForCall - Add constraints for a call with actual arguments
-/// specified by CS to the function specified by F.  Note that the types of
-/// arguments might not match up in the case where this is an indirect call and
-/// the function pointer has been casted.  If this is the case, do something
-/// reasonable.
-void Andersens::AddConstraintsForCall(CallSite CS, Function *F) {
-  Value *CallValue = CS.getCalledValue();
-  bool IsDeref = F == NULL;
-
-  // If this is a call to an external function, try to handle it directly to get
-  // some taste of context sensitivity.
-  if (F && F->isDeclaration() && AddConstraintsForExternalCall(CS, F))
-    return;
-
-  if (isa<PointerType>(CS.getType())) {
-    unsigned CSN = getNode(CS.getInstruction());
-    if (!F || isa<PointerType>(F->getFunctionType()->getReturnType())) {
-      if (IsDeref)
-        Constraints.push_back(Constraint(Constraint::Load, CSN,
-                                         getNode(CallValue), CallReturnPos));
-      else
-        Constraints.push_back(Constraint(Constraint::Copy, CSN,
-                                         getNode(CallValue) + CallReturnPos));
-    } else {
-      // If the function returns a non-pointer value, handle this just like we
-      // treat a nonpointer cast to pointer.
-      Constraints.push_back(Constraint(Constraint::Copy, CSN,
-                                       UniversalSet));
-    }
-  } else if (F && isa<PointerType>(F->getFunctionType()->getReturnType())) {
-#if FULL_UNIVERSAL
-    Constraints.push_back(Constraint(Constraint::Copy,
-                                     UniversalSet,
-                                     getNode(CallValue) + CallReturnPos));
-#else
-    Constraints.push_back(Constraint(Constraint::Copy,
-                                      getNode(CallValue) + CallReturnPos,
-                                      UniversalSet));
-#endif
-                          
-    
-  }
-
-  CallSite::arg_iterator ArgI = CS.arg_begin(), ArgE = CS.arg_end();
-  bool external = !F ||  F->isDeclaration();
-  if (F) {
-    // Direct Call
-    Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end();
-    for (; AI != AE && ArgI != ArgE; ++AI, ++ArgI) 
-      {
-#if !FULL_UNIVERSAL
-        if (external && isa<PointerType>((*ArgI)->getType())) 
-          {
-            // Add constraint that ArgI can now point to anything due to
-            // escaping, as can everything it points to. The second portion of
-            // this should be taken care of by universal = *universal
-            Constraints.push_back(Constraint(Constraint::Copy,
-                                             getNode(*ArgI),
-                                             UniversalSet));
-          }
-#endif
-        if (isa<PointerType>(AI->getType())) {
-          if (isa<PointerType>((*ArgI)->getType())) {
-            // Copy the actual argument into the formal argument.
-            Constraints.push_back(Constraint(Constraint::Copy, getNode(AI),
-                                             getNode(*ArgI)));
-          } else {
-            Constraints.push_back(Constraint(Constraint::Copy, getNode(AI),
-                                             UniversalSet));
-          }
-        } else if (isa<PointerType>((*ArgI)->getType())) {
-#if FULL_UNIVERSAL
-          Constraints.push_back(Constraint(Constraint::Copy,
-                                           UniversalSet,
-                                           getNode(*ArgI)));
-#else
-          Constraints.push_back(Constraint(Constraint::Copy,
-                                           getNode(*ArgI),
-                                           UniversalSet));
-#endif
-        }
-      }
-  } else {
-    //Indirect Call
-    unsigned ArgPos = CallFirstArgPos;
-    for (; ArgI != ArgE; ++ArgI) {
-      if (isa<PointerType>((*ArgI)->getType())) {
-        // Copy the actual argument into the formal argument.
-        Constraints.push_back(Constraint(Constraint::Store,
-                                         getNode(CallValue),
-                                         getNode(*ArgI), ArgPos++));
-      } else {
-        Constraints.push_back(Constraint(Constraint::Store,
-                                         getNode (CallValue),
-                                         UniversalSet, ArgPos++));
-      }
-    }
-  }
-  // Copy all pointers passed through the varargs section to the varargs node.
-  if (F && F->getFunctionType()->isVarArg())
-    for (; ArgI != ArgE; ++ArgI)
-      if (isa<PointerType>((*ArgI)->getType()))
-        Constraints.push_back(Constraint(Constraint::Copy, getVarargNode(F),
-                                         getNode(*ArgI)));
-  // If more arguments are passed in than we track, just drop them on the floor.
-}
-
-void Andersens::visitCallSite(CallSite CS) {
-  if (isa<PointerType>(CS.getType()))
-    getNodeValue(*CS.getInstruction());
-
-  if (Function *F = CS.getCalledFunction()) {
-    AddConstraintsForCall(CS, F);
-  } else {
-    AddConstraintsForCall(CS, NULL);
-  }
-}
-
-//===----------------------------------------------------------------------===//
-//                         Constraint Solving Phase
-//===----------------------------------------------------------------------===//
-
-/// intersects - Return true if the points-to set of this node intersects
-/// with the points-to set of the specified node.
-bool Andersens::Node::intersects(Node *N) const {
-  return PointsTo->intersects(N->PointsTo);
-}
-
-/// intersectsIgnoring - Return true if the points-to set of this node
-/// intersects with the points-to set of the specified node on any nodes
-/// except for the specified node to ignore.
-bool Andersens::Node::intersectsIgnoring(Node *N, unsigned Ignoring) const {
-  // TODO: If we are only going to call this with the same value for Ignoring,
-  // we should move the special values out of the points-to bitmap.
-  bool WeHadIt = PointsTo->test(Ignoring);
-  bool NHadIt = N->PointsTo->test(Ignoring);
-  bool Result = false;
-  if (WeHadIt)
-    PointsTo->reset(Ignoring);
-  if (NHadIt)
-    N->PointsTo->reset(Ignoring);
-  Result = PointsTo->intersects(N->PointsTo);
-  if (WeHadIt)
-    PointsTo->set(Ignoring);
-  if (NHadIt)
-    N->PointsTo->set(Ignoring);
-  return Result;
-}
-
-
-/// Clump together address taken variables so that the points-to sets use up
-/// less space and can be operated on faster.
-
-void Andersens::ClumpAddressTaken() {
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "anders-aa-renumber"
-  std::vector<unsigned> Translate;
-  std::vector<Node> NewGraphNodes;
-
-  Translate.resize(GraphNodes.size());
-  unsigned NewPos = 0;
-
-  for (unsigned i = 0; i < Constraints.size(); ++i) {
-    Constraint &C = Constraints[i];
-    if (C.Type == Constraint::AddressOf) {
-      GraphNodes[C.Src].AddressTaken = true;
-    }
-  }
-  for (unsigned i = 0; i < NumberSpecialNodes; ++i) {
-    unsigned Pos = NewPos++;
-    Translate[i] = Pos;
-    NewGraphNodes.push_back(GraphNodes[i]);
-    DEBUG(dbgs() << "Renumbering node " << i << " to node " << Pos << "\n");
-  }
-
-  // I believe this ends up being faster than making two vectors and splicing
-  // them.
-  for (unsigned i = NumberSpecialNodes; i < GraphNodes.size(); ++i) {
-    if (GraphNodes[i].AddressTaken) {
-      unsigned Pos = NewPos++;
-      Translate[i] = Pos;
-      NewGraphNodes.push_back(GraphNodes[i]);
-      DEBUG(dbgs() << "Renumbering node " << i << " to node " << Pos << "\n");
-    }
-  }
-
-  for (unsigned i = NumberSpecialNodes; i < GraphNodes.size(); ++i) {
-    if (!GraphNodes[i].AddressTaken) {
-      unsigned Pos = NewPos++;
-      Translate[i] = Pos;
-      NewGraphNodes.push_back(GraphNodes[i]);
-      DEBUG(dbgs() << "Renumbering node " << i << " to node " << Pos << "\n");
-    }
-  }
-
-  for (DenseMap<Value*, unsigned>::iterator Iter = ValueNodes.begin();
-       Iter != ValueNodes.end();
-       ++Iter)
-    Iter->second = Translate[Iter->second];
-
-  for (DenseMap<Value*, unsigned>::iterator Iter = ObjectNodes.begin();
-       Iter != ObjectNodes.end();
-       ++Iter)
-    Iter->second = Translate[Iter->second];
-
-  for (DenseMap<Function*, unsigned>::iterator Iter = ReturnNodes.begin();
-       Iter != ReturnNodes.end();
-       ++Iter)
-    Iter->second = Translate[Iter->second];
-
-  for (DenseMap<Function*, unsigned>::iterator Iter = VarargNodes.begin();
-       Iter != VarargNodes.end();
-       ++Iter)
-    Iter->second = Translate[Iter->second];
-
-  for (unsigned i = 0; i < Constraints.size(); ++i) {
-    Constraint &C = Constraints[i];
-    C.Src = Translate[C.Src];
-    C.Dest = Translate[C.Dest];
-  }
-
-  GraphNodes.swap(NewGraphNodes);
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "anders-aa"
-}
-
-/// The technique used here is described in "Exploiting Pointer and Location
-/// Equivalence to Optimize Pointer Analysis. In the 14th International Static
-/// Analysis Symposium (SAS), August 2007."  It is known as the "HVN" algorithm,
-/// and is equivalent to value numbering the collapsed constraint graph without
-/// evaluating unions.  This is used as a pre-pass to HU in order to resolve
-/// first order pointer dereferences and speed up/reduce memory usage of HU.
-/// Running both is equivalent to HRU without the iteration
-/// HVN in more detail:
-/// Imagine the set of constraints was simply straight line code with no loops
-/// (we eliminate cycles, so there are no loops), such as:
-/// E = &D
-/// E = &C
-/// E = F
-/// F = G
-/// G = F
-/// Applying value numbering to this code tells us:
-/// G == F == E
-///
-/// For HVN, this is as far as it goes.  We assign new value numbers to every
-/// "address node", and every "reference node".
-/// To get the optimal result for this, we use a DFS + SCC (since all nodes in a
-/// cycle must have the same value number since the = operation is really
-/// inclusion, not overwrite), and value number nodes we receive points-to sets
-/// before we value our own node.
-/// The advantage of HU over HVN is that HU considers the inclusion property, so
-/// that if you have
-/// E = &D
-/// E = &C
-/// E = F
-/// F = G
-/// F = &D
-/// G = F
-/// HU will determine that G == F == E.  HVN will not, because it cannot prove
-/// that the points to information ends up being the same because they all
-/// receive &D from E anyway.
-
-void Andersens::HVN() {
-  DEBUG(dbgs() << "Beginning HVN\n");
-  // Build a predecessor graph.  This is like our constraint graph with the
-  // edges going in the opposite direction, and there are edges for all the
-  // constraints, instead of just copy constraints.  We also build implicit
-  // edges for constraints are implied but not explicit.  I.E for the constraint
-  // a = &b, we add implicit edges *a = b.  This helps us capture more cycles
-  for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
-    Constraint &C = Constraints[i];
-    if (C.Type == Constraint::AddressOf) {
-      GraphNodes[C.Src].AddressTaken = true;
-      GraphNodes[C.Src].Direct = false;
-
-      // Dest = &src edge
-      unsigned AdrNode = C.Src + FirstAdrNode;
-      if (!GraphNodes[C.Dest].PredEdges)
-        GraphNodes[C.Dest].PredEdges = new SparseBitVector<>;
-      GraphNodes[C.Dest].PredEdges->set(AdrNode);
-
-      // *Dest = src edge
-      unsigned RefNode = C.Dest + FirstRefNode;
-      if (!GraphNodes[RefNode].ImplicitPredEdges)
-        GraphNodes[RefNode].ImplicitPredEdges = new SparseBitVector<>;
-      GraphNodes[RefNode].ImplicitPredEdges->set(C.Src);
-    } else if (C.Type == Constraint::Load) {
-      if (C.Offset == 0) {
-        // dest = *src edge
-        if (!GraphNodes[C.Dest].PredEdges)
-          GraphNodes[C.Dest].PredEdges = new SparseBitVector<>;
-        GraphNodes[C.Dest].PredEdges->set(C.Src + FirstRefNode);
-      } else {
-        GraphNodes[C.Dest].Direct = false;
-      }
-    } else if (C.Type == Constraint::Store) {
-      if (C.Offset == 0) {
-        // *dest = src edge
-        unsigned RefNode = C.Dest + FirstRefNode;
-        if (!GraphNodes[RefNode].PredEdges)
-          GraphNodes[RefNode].PredEdges = new SparseBitVector<>;
-        GraphNodes[RefNode].PredEdges->set(C.Src);
-      }
-    } else {
-      // Dest = Src edge and *Dest = *Src edge
-      if (!GraphNodes[C.Dest].PredEdges)
-        GraphNodes[C.Dest].PredEdges = new SparseBitVector<>;
-      GraphNodes[C.Dest].PredEdges->set(C.Src);
-      unsigned RefNode = C.Dest + FirstRefNode;
-      if (!GraphNodes[RefNode].ImplicitPredEdges)
-        GraphNodes[RefNode].ImplicitPredEdges = new SparseBitVector<>;
-      GraphNodes[RefNode].ImplicitPredEdges->set(C.Src + FirstRefNode);
-    }
-  }
-  PEClass = 1;
-  // Do SCC finding first to condense our predecessor graph
-  DFSNumber = 0;
-  Node2DFS.insert(Node2DFS.begin(), GraphNodes.size(), 0);
-  Node2Deleted.insert(Node2Deleted.begin(), GraphNodes.size(), false);
-  Node2Visited.insert(Node2Visited.begin(), GraphNodes.size(), false);
-
-  for (unsigned i = 0; i < FirstRefNode; ++i) {
-    unsigned Node = VSSCCRep[i];
-    if (!Node2Visited[Node])
-      HVNValNum(Node);
-  }
-  for (BitVectorMap::iterator Iter = Set2PEClass.begin();
-       Iter != Set2PEClass.end();
-       ++Iter)
-    delete Iter->first;
-  Set2PEClass.clear();
-  Node2DFS.clear();
-  Node2Deleted.clear();
-  Node2Visited.clear();
-  DEBUG(dbgs() << "Finished HVN\n");
-
-}
-
-/// This is the workhorse of HVN value numbering. We combine SCC finding at the
-/// same time because it's easy.
-void Andersens::HVNValNum(unsigned NodeIndex) {
-  unsigned MyDFS = DFSNumber++;
-  Node *N = &GraphNodes[NodeIndex];
-  Node2Visited[NodeIndex] = true;
-  Node2DFS[NodeIndex] = MyDFS;
-
-  // First process all our explicit edges
-  if (N->PredEdges)
-    for (SparseBitVector<>::iterator Iter = N->PredEdges->begin();
-         Iter != N->PredEdges->end();
-         ++Iter) {
-      unsigned j = VSSCCRep[*Iter];
-      if (!Node2Deleted[j]) {
-        if (!Node2Visited[j])
-          HVNValNum(j);
-        if (Node2DFS[NodeIndex] > Node2DFS[j])
-          Node2DFS[NodeIndex] = Node2DFS[j];
-      }
-    }
-
-  // Now process all the implicit edges
-  if (N->ImplicitPredEdges)
-    for (SparseBitVector<>::iterator Iter = N->ImplicitPredEdges->begin();
-         Iter != N->ImplicitPredEdges->end();
-         ++Iter) {
-      unsigned j = VSSCCRep[*Iter];
-      if (!Node2Deleted[j]) {
-        if (!Node2Visited[j])
-          HVNValNum(j);
-        if (Node2DFS[NodeIndex] > Node2DFS[j])
-          Node2DFS[NodeIndex] = Node2DFS[j];
-      }
-    }
-
-  // See if we found any cycles
-  if (MyDFS == Node2DFS[NodeIndex]) {
-    while (!SCCStack.empty() && Node2DFS[SCCStack.top()] >= MyDFS) {
-      unsigned CycleNodeIndex = SCCStack.top();
-      Node *CycleNode = &GraphNodes[CycleNodeIndex];
-      VSSCCRep[CycleNodeIndex] = NodeIndex;
-      // Unify the nodes
-      N->Direct &= CycleNode->Direct;
-
-      if (CycleNode->PredEdges) {
-        if (!N->PredEdges)
-          N->PredEdges = new SparseBitVector<>;
-        *(N->PredEdges) |= CycleNode->PredEdges;
-        delete CycleNode->PredEdges;
-        CycleNode->PredEdges = NULL;
-      }
-      if (CycleNode->ImplicitPredEdges) {
-        if (!N->ImplicitPredEdges)
-          N->ImplicitPredEdges = new SparseBitVector<>;
-        *(N->ImplicitPredEdges) |= CycleNode->ImplicitPredEdges;
-        delete CycleNode->ImplicitPredEdges;
-        CycleNode->ImplicitPredEdges = NULL;
-      }
-
-      SCCStack.pop();
-    }
-
-    Node2Deleted[NodeIndex] = true;
-
-    if (!N->Direct) {
-      GraphNodes[NodeIndex].PointerEquivLabel = PEClass++;
-      return;
-    }
-
-    // Collect labels of successor nodes
-    bool AllSame = true;
-    unsigned First = ~0;
-    SparseBitVector<> *Labels = new SparseBitVector<>;
-    bool Used = false;
-
-    if (N->PredEdges)
-      for (SparseBitVector<>::iterator Iter = N->PredEdges->begin();
-           Iter != N->PredEdges->end();
-         ++Iter) {
-        unsigned j = VSSCCRep[*Iter];
-        unsigned Label = GraphNodes[j].PointerEquivLabel;
-        // Ignore labels that are equal to us or non-pointers
-        if (j == NodeIndex || Label == 0)
-          continue;
-        if (First == (unsigned)~0)
-          First = Label;
-        else if (First != Label)
-          AllSame = false;
-        Labels->set(Label);
-    }
-
-    // We either have a non-pointer, a copy of an existing node, or a new node.
-    // Assign the appropriate pointer equivalence label.
-    if (Labels->empty()) {
-      GraphNodes[NodeIndex].PointerEquivLabel = 0;
-    } else if (AllSame) {
-      GraphNodes[NodeIndex].PointerEquivLabel = First;
-    } else {
-      GraphNodes[NodeIndex].PointerEquivLabel = Set2PEClass[Labels];
-      if (GraphNodes[NodeIndex].PointerEquivLabel == 0) {
-        unsigned EquivClass = PEClass++;
-        Set2PEClass[Labels] = EquivClass;
-        GraphNodes[NodeIndex].PointerEquivLabel = EquivClass;
-        Used = true;
-      }
-    }
-    if (!Used)
-      delete Labels;
-  } else {
-    SCCStack.push(NodeIndex);
-  }
-}
-
-/// The technique used here is described in "Exploiting Pointer and Location
-/// Equivalence to Optimize Pointer Analysis. In the 14th International Static
-/// Analysis Symposium (SAS), August 2007."  It is known as the "HU" algorithm,
-/// and is equivalent to value numbering the collapsed constraint graph
-/// including evaluating unions.
-void Andersens::HU() {
-  DEBUG(dbgs() << "Beginning HU\n");
-  // Build a predecessor graph.  This is like our constraint graph with the
-  // edges going in the opposite direction, and there are edges for all the
-  // constraints, instead of just copy constraints.  We also build implicit
-  // edges for constraints are implied but not explicit.  I.E for the constraint
-  // a = &b, we add implicit edges *a = b.  This helps us capture more cycles
-  for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
-    Constraint &C = Constraints[i];
-    if (C.Type == Constraint::AddressOf) {
-      GraphNodes[C.Src].AddressTaken = true;
-      GraphNodes[C.Src].Direct = false;
-
-      GraphNodes[C.Dest].PointsTo->set(C.Src);
-      // *Dest = src edge
-      unsigned RefNode = C.Dest + FirstRefNode;
-      if (!GraphNodes[RefNode].ImplicitPredEdges)
-        GraphNodes[RefNode].ImplicitPredEdges = new SparseBitVector<>;
-      GraphNodes[RefNode].ImplicitPredEdges->set(C.Src);
-      GraphNodes[C.Src].PointedToBy->set(C.Dest);
-    } else if (C.Type == Constraint::Load) {
-      if (C.Offset == 0) {
-        // dest = *src edge
-        if (!GraphNodes[C.Dest].PredEdges)
-          GraphNodes[C.Dest].PredEdges = new SparseBitVector<>;
-        GraphNodes[C.Dest].PredEdges->set(C.Src + FirstRefNode);
-      } else {
-        GraphNodes[C.Dest].Direct = false;
-      }
-    } else if (C.Type == Constraint::Store) {
-      if (C.Offset == 0) {
-        // *dest = src edge
-        unsigned RefNode = C.Dest + FirstRefNode;
-        if (!GraphNodes[RefNode].PredEdges)
-          GraphNodes[RefNode].PredEdges = new SparseBitVector<>;
-        GraphNodes[RefNode].PredEdges->set(C.Src);
-      }
-    } else {
-      // Dest = Src edge and *Dest = *Src edg
-      if (!GraphNodes[C.Dest].PredEdges)
-        GraphNodes[C.Dest].PredEdges = new SparseBitVector<>;
-      GraphNodes[C.Dest].PredEdges->set(C.Src);
-      unsigned RefNode = C.Dest + FirstRefNode;
-      if (!GraphNodes[RefNode].ImplicitPredEdges)
-        GraphNodes[RefNode].ImplicitPredEdges = new SparseBitVector<>;
-      GraphNodes[RefNode].ImplicitPredEdges->set(C.Src + FirstRefNode);
-    }
-  }
-  PEClass = 1;
-  // Do SCC finding first to condense our predecessor graph
-  DFSNumber = 0;
-  Node2DFS.insert(Node2DFS.begin(), GraphNodes.size(), 0);
-  Node2Deleted.insert(Node2Deleted.begin(), GraphNodes.size(), false);
-  Node2Visited.insert(Node2Visited.begin(), GraphNodes.size(), false);
-
-  for (unsigned i = 0; i < FirstRefNode; ++i) {
-    if (FindNode(i) == i) {
-      unsigned Node = VSSCCRep[i];
-      if (!Node2Visited[Node])
-        Condense(Node);
-    }
-  }
-
-  // Reset tables for actual labeling
-  Node2DFS.clear();
-  Node2Visited.clear();
-  Node2Deleted.clear();
-  // Pre-grow our densemap so that we don't get really bad behavior
-  Set2PEClass.resize(GraphNodes.size());
-
-  // Visit the condensed graph and generate pointer equivalence labels.
-  Node2Visited.insert(Node2Visited.begin(), GraphNodes.size(), false);
-  for (unsigned i = 0; i < FirstRefNode; ++i) {
-    if (FindNode(i) == i) {
-      unsigned Node = VSSCCRep[i];
-      if (!Node2Visited[Node])
-        HUValNum(Node);
-    }
-  }
-  // PEClass nodes will be deleted by the deleting of N->PointsTo in our caller.
-  Set2PEClass.clear();
-  DEBUG(dbgs() << "Finished HU\n");
-}
-
-
-/// Implementation of standard Tarjan SCC algorithm as modified by Nuutilla.
-void Andersens::Condense(unsigned NodeIndex) {
-  unsigned MyDFS = DFSNumber++;
-  Node *N = &GraphNodes[NodeIndex];
-  Node2Visited[NodeIndex] = true;
-  Node2DFS[NodeIndex] = MyDFS;
-
-  // First process all our explicit edges
-  if (N->PredEdges)
-    for (SparseBitVector<>::iterator Iter = N->PredEdges->begin();
-         Iter != N->PredEdges->end();
-         ++Iter) {
-      unsigned j = VSSCCRep[*Iter];
-      if (!Node2Deleted[j]) {
-        if (!Node2Visited[j])
-          Condense(j);
-        if (Node2DFS[NodeIndex] > Node2DFS[j])
-          Node2DFS[NodeIndex] = Node2DFS[j];
-      }
-    }
-
-  // Now process all the implicit edges
-  if (N->ImplicitPredEdges)
-    for (SparseBitVector<>::iterator Iter = N->ImplicitPredEdges->begin();
-         Iter != N->ImplicitPredEdges->end();
-         ++Iter) {
-      unsigned j = VSSCCRep[*Iter];
-      if (!Node2Deleted[j]) {
-        if (!Node2Visited[j])
-          Condense(j);
-        if (Node2DFS[NodeIndex] > Node2DFS[j])
-          Node2DFS[NodeIndex] = Node2DFS[j];
-      }
-    }
-
-  // See if we found any cycles
-  if (MyDFS == Node2DFS[NodeIndex]) {
-    while (!SCCStack.empty() && Node2DFS[SCCStack.top()] >= MyDFS) {
-      unsigned CycleNodeIndex = SCCStack.top();
-      Node *CycleNode = &GraphNodes[CycleNodeIndex];
-      VSSCCRep[CycleNodeIndex] = NodeIndex;
-      // Unify the nodes
-      N->Direct &= CycleNode->Direct;
-
-      *(N->PointsTo) |= CycleNode->PointsTo;
-      delete CycleNode->PointsTo;
-      CycleNode->PointsTo = NULL;
-      if (CycleNode->PredEdges) {
-        if (!N->PredEdges)
-          N->PredEdges = new SparseBitVector<>;
-        *(N->PredEdges) |= CycleNode->PredEdges;
-        delete CycleNode->PredEdges;
-        CycleNode->PredEdges = NULL;
-      }
-      if (CycleNode->ImplicitPredEdges) {
-        if (!N->ImplicitPredEdges)
-          N->ImplicitPredEdges = new SparseBitVector<>;
-        *(N->ImplicitPredEdges) |= CycleNode->ImplicitPredEdges;
-        delete CycleNode->ImplicitPredEdges;
-        CycleNode->ImplicitPredEdges = NULL;
-      }
-      SCCStack.pop();
-    }
-
-    Node2Deleted[NodeIndex] = true;
-
-    // Set up number of incoming edges for other nodes
-    if (N->PredEdges)
-      for (SparseBitVector<>::iterator Iter = N->PredEdges->begin();
-           Iter != N->PredEdges->end();
-           ++Iter)
-        ++GraphNodes[VSSCCRep[*Iter]].NumInEdges;
-  } else {
-    SCCStack.push(NodeIndex);
-  }
-}
-
-void Andersens::HUValNum(unsigned NodeIndex) {
-  Node *N = &GraphNodes[NodeIndex];
-  Node2Visited[NodeIndex] = true;
-
-  // Eliminate dereferences of non-pointers for those non-pointers we have
-  // already identified.  These are ref nodes whose non-ref node:
-  // 1. Has already been visited determined to point to nothing (and thus, a
-  // dereference of it must point to nothing)
-  // 2. Any direct node with no predecessor edges in our graph and with no
-  // points-to set (since it can't point to anything either, being that it
-  // receives no points-to sets and has none).
-  if (NodeIndex >= FirstRefNode) {
-    unsigned j = VSSCCRep[FindNode(NodeIndex - FirstRefNode)];
-    if ((Node2Visited[j] && !GraphNodes[j].PointerEquivLabel)
-        || (GraphNodes[j].Direct && !GraphNodes[j].PredEdges
-            && GraphNodes[j].PointsTo->empty())){
-      return;
-    }
-  }
-    // Process all our explicit edges
-  if (N->PredEdges)
-    for (SparseBitVector<>::iterator Iter = N->PredEdges->begin();
-         Iter != N->PredEdges->end();
-         ++Iter) {
-      unsigned j = VSSCCRep[*Iter];
-      if (!Node2Visited[j])
-        HUValNum(j);
-
-      // If this edge turned out to be the same as us, or got no pointer
-      // equivalence label (and thus points to nothing) , just decrement our
-      // incoming edges and continue.
-      if (j == NodeIndex || GraphNodes[j].PointerEquivLabel == 0) {
-        --GraphNodes[j].NumInEdges;
-        continue;
-      }
-
-      *(N->PointsTo) |= GraphNodes[j].PointsTo;
-
-      // If we didn't end up storing this in the hash, and we're done with all
-      // the edges, we don't need the points-to set anymore.
-      --GraphNodes[j].NumInEdges;
-      if (!GraphNodes[j].NumInEdges && !GraphNodes[j].StoredInHash) {
-        delete GraphNodes[j].PointsTo;
-        GraphNodes[j].PointsTo = NULL;
-      }
-    }
-  // If this isn't a direct node, generate a fresh variable.
-  if (!N->Direct) {
-    N->PointsTo->set(FirstRefNode + NodeIndex);
-  }
-
-  // See If we have something equivalent to us, if not, generate a new
-  // equivalence class.
-  if (N->PointsTo->empty()) {
-    delete N->PointsTo;
-    N->PointsTo = NULL;
-  } else {
-    if (N->Direct) {
-      N->PointerEquivLabel = Set2PEClass[N->PointsTo];
-      if (N->PointerEquivLabel == 0) {
-        unsigned EquivClass = PEClass++;
-        N->StoredInHash = true;
-        Set2PEClass[N->PointsTo] = EquivClass;
-        N->PointerEquivLabel = EquivClass;
-      }
-    } else {
-      N->PointerEquivLabel = PEClass++;
-    }
-  }
-}
-
-/// Rewrite our list of constraints so that pointer equivalent nodes are
-/// replaced by their the pointer equivalence class representative.
-void Andersens::RewriteConstraints() {
-  std::vector<Constraint> NewConstraints;
-  DenseSet<Constraint, ConstraintKeyInfo> Seen;
-
-  PEClass2Node.clear();
-  PENLEClass2Node.clear();
-
-  // We may have from 1 to Graphnodes + 1 equivalence classes.
-  PEClass2Node.insert(PEClass2Node.begin(), GraphNodes.size() + 1, -1);
-  PENLEClass2Node.insert(PENLEClass2Node.begin(), GraphNodes.size() + 1, -1);
-
-  // Rewrite constraints, ignoring non-pointer constraints, uniting equivalent
-  // nodes, and rewriting constraints to use the representative nodes.
-  for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
-    Constraint &C = Constraints[i];
-    unsigned RHSNode = FindNode(C.Src);
-    unsigned LHSNode = FindNode(C.Dest);
-    unsigned RHSLabel = GraphNodes[VSSCCRep[RHSNode]].PointerEquivLabel;
-    unsigned LHSLabel = GraphNodes[VSSCCRep[LHSNode]].PointerEquivLabel;
-
-    // First we try to eliminate constraints for things we can prove don't point
-    // to anything.
-    if (LHSLabel == 0) {
-      DEBUG(PrintNode(&GraphNodes[LHSNode]));
-      DEBUG(dbgs() << " is a non-pointer, ignoring constraint.\n");
-      continue;
-    }
-    if (RHSLabel == 0) {
-      DEBUG(PrintNode(&GraphNodes[RHSNode]));
-      DEBUG(dbgs() << " is a non-pointer, ignoring constraint.\n");
-      continue;
-    }
-    // This constraint may be useless, and it may become useless as we translate
-    // it.
-    if (C.Src == C.Dest && C.Type == Constraint::Copy)
-      continue;
-
-    C.Src = FindEquivalentNode(RHSNode, RHSLabel);
-    C.Dest = FindEquivalentNode(FindNode(LHSNode), LHSLabel);
-    if ((C.Src == C.Dest && C.Type == Constraint::Copy)
-        || Seen.count(C))
-      continue;
-
-    Seen.insert(C);
-    NewConstraints.push_back(C);
-  }
-  Constraints.swap(NewConstraints);
-  PEClass2Node.clear();
-}
-
-/// See if we have a node that is pointer equivalent to the one being asked
-/// about, and if so, unite them and return the equivalent node.  Otherwise,
-/// return the original node.
-unsigned Andersens::FindEquivalentNode(unsigned NodeIndex,
-                                       unsigned NodeLabel) {
-  if (!GraphNodes[NodeIndex].AddressTaken) {
-    if (PEClass2Node[NodeLabel] != -1) {
-      // We found an existing node with the same pointer label, so unify them.
-      // We specifically request that Union-By-Rank not be used so that
-      // PEClass2Node[NodeLabel] U= NodeIndex and not the other way around.
-      return UniteNodes(PEClass2Node[NodeLabel], NodeIndex, false);
-    } else {
-      PEClass2Node[NodeLabel] = NodeIndex;
-      PENLEClass2Node[NodeLabel] = NodeIndex;
-    }
-  } else if (PENLEClass2Node[NodeLabel] == -1) {
-    PENLEClass2Node[NodeLabel] = NodeIndex;
-  }
-
-  return NodeIndex;
-}
-
-void Andersens::PrintLabels() const {
-  for (unsigned i = 0; i < GraphNodes.size(); ++i) {
-    if (i < FirstRefNode) {
-      PrintNode(&GraphNodes[i]);
-    } else if (i < FirstAdrNode) {
-      DEBUG(dbgs() << "REF(");
-      PrintNode(&GraphNodes[i-FirstRefNode]);
-      DEBUG(dbgs() <<")");
-    } else {
-      DEBUG(dbgs() << "ADR(");
-      PrintNode(&GraphNodes[i-FirstAdrNode]);
-      DEBUG(dbgs() <<")");
-    }
-
-    DEBUG(dbgs() << " has pointer label " << GraphNodes[i].PointerEquivLabel
-         << " and SCC rep " << VSSCCRep[i]
-         << " and is " << (GraphNodes[i].Direct ? "Direct" : "Not direct")
-         << "\n");
-  }
-}
-
-/// The technique used here is described in "The Ant and the
-/// Grasshopper: Fast and Accurate Pointer Analysis for Millions of
-/// Lines of Code. In Programming Language Design and Implementation
-/// (PLDI), June 2007." It is known as the "HCD" (Hybrid Cycle
-/// Detection) algorithm. It is called a hybrid because it performs an
-/// offline analysis and uses its results during the solving (online)
-/// phase. This is just the offline portion; the results of this
-/// operation are stored in SDT and are later used in SolveContraints()
-/// and UniteNodes().
-void Andersens::HCD() {
-  DEBUG(dbgs() << "Starting HCD.\n");
-  HCDSCCRep.resize(GraphNodes.size());
-
-  for (unsigned i = 0; i < GraphNodes.size(); ++i) {
-    GraphNodes[i].Edges = new SparseBitVector<>;
-    HCDSCCRep[i] = i;
-  }
-
-  for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
-    Constraint &C = Constraints[i];
-    assert (C.Src < GraphNodes.size() && C.Dest < GraphNodes.size());
-    if (C.Type == Constraint::AddressOf) {
-      continue;
-    } else if (C.Type == Constraint::Load) {
-      if( C.Offset == 0 )
-        GraphNodes[C.Dest].Edges->set(C.Src + FirstRefNode);
-    } else if (C.Type == Constraint::Store) {
-      if( C.Offset == 0 )
-        GraphNodes[C.Dest + FirstRefNode].Edges->set(C.Src);
-    } else {
-      GraphNodes[C.Dest].Edges->set(C.Src);
-    }
-  }
-
-  Node2DFS.insert(Node2DFS.begin(), GraphNodes.size(), 0);
-  Node2Deleted.insert(Node2Deleted.begin(), GraphNodes.size(), false);
-  Node2Visited.insert(Node2Visited.begin(), GraphNodes.size(), false);
-  SDT.insert(SDT.begin(), GraphNodes.size() / 2, -1);
-
-  DFSNumber = 0;
-  for (unsigned i = 0; i < GraphNodes.size(); ++i) {
-    unsigned Node = HCDSCCRep[i];
-    if (!Node2Deleted[Node])
-      Search(Node);
-  }
-
-  for (unsigned i = 0; i < GraphNodes.size(); ++i)
-    if (GraphNodes[i].Edges != NULL) {
-      delete GraphNodes[i].Edges;
-      GraphNodes[i].Edges = NULL;
-    }
-
-  while( !SCCStack.empty() )
-    SCCStack.pop();
-
-  Node2DFS.clear();
-  Node2Visited.clear();
-  Node2Deleted.clear();
-  HCDSCCRep.clear();
-  DEBUG(dbgs() << "HCD complete.\n");
-}
-
-// Component of HCD: 
-// Use Nuutila's variant of Tarjan's algorithm to detect
-// Strongly-Connected Components (SCCs). For non-trivial SCCs
-// containing ref nodes, insert the appropriate information in SDT.
-void Andersens::Search(unsigned Node) {
-  unsigned MyDFS = DFSNumber++;
-
-  Node2Visited[Node] = true;
-  Node2DFS[Node] = MyDFS;
-
-  for (SparseBitVector<>::iterator Iter = GraphNodes[Node].Edges->begin(),
-                                   End  = GraphNodes[Node].Edges->end();
-       Iter != End;
-       ++Iter) {
-    unsigned J = HCDSCCRep[*Iter];
-    assert(GraphNodes[J].isRep() && "Debug check; must be representative");
-    if (!Node2Deleted[J]) {
-      if (!Node2Visited[J])
-        Search(J);
-      if (Node2DFS[Node] > Node2DFS[J])
-        Node2DFS[Node] = Node2DFS[J];
-    }
-  }
-
-  if( MyDFS != Node2DFS[Node] ) {
-    SCCStack.push(Node);
-    return;
-  }
-
-  // This node is the root of a SCC, so process it.
-  //
-  // If the SCC is "non-trivial" (not a singleton) and contains a reference 
-  // node, we place this SCC into SDT.  We unite the nodes in any case.
-  if (!SCCStack.empty() && Node2DFS[SCCStack.top()] >= MyDFS) {
-    SparseBitVector<> SCC;
-
-    SCC.set(Node);
-
-    bool Ref = (Node >= FirstRefNode);
-
-    Node2Deleted[Node] = true;
-
-    do {
-      unsigned P = SCCStack.top(); SCCStack.pop();
-      Ref |= (P >= FirstRefNode);
-      SCC.set(P);
-      HCDSCCRep[P] = Node;
-    } while (!SCCStack.empty() && Node2DFS[SCCStack.top()] >= MyDFS);
-
-    if (Ref) {
-      unsigned Rep = SCC.find_first();
-      assert(Rep < FirstRefNode && "The SCC didn't have a non-Ref node!");
-
-      SparseBitVector<>::iterator i = SCC.begin();
-
-      // Skip over the non-ref nodes
-      while( *i < FirstRefNode )
-        ++i;
-
-      while( i != SCC.end() )
-        SDT[ (*i++) - FirstRefNode ] = Rep;
-    }
-  }
-}
-
-
-/// Optimize the constraints by performing offline variable substitution and
-/// other optimizations.
-void Andersens::OptimizeConstraints() {
-  DEBUG(dbgs() << "Beginning constraint optimization\n");
-
-  SDTActive = false;
-
-  // Function related nodes need to stay in the same relative position and can't
-  // be location equivalent.
-  for (std::map<unsigned, unsigned>::iterator Iter = MaxK.begin();
-       Iter != MaxK.end();
-       ++Iter) {
-    for (unsigned i = Iter->first;
-         i != Iter->first + Iter->second;
-         ++i) {
-      GraphNodes[i].AddressTaken = true;
-      GraphNodes[i].Direct = false;
-    }
-  }
-
-  ClumpAddressTaken();
-  FirstRefNode = GraphNodes.size();
-  FirstAdrNode = FirstRefNode + GraphNodes.size();
-  GraphNodes.insert(GraphNodes.end(), 2 * GraphNodes.size(),
-                    Node(false));
-  VSSCCRep.resize(GraphNodes.size());
-  for (unsigned i = 0; i < GraphNodes.size(); ++i) {
-    VSSCCRep[i] = i;
-  }
-  HVN();
-  for (unsigned i = 0; i < GraphNodes.size(); ++i) {
-    Node *N = &GraphNodes[i];
-    delete N->PredEdges;
-    N->PredEdges = NULL;
-    delete N->ImplicitPredEdges;
-    N->ImplicitPredEdges = NULL;
-  }
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "anders-aa-labels"
-  DEBUG(PrintLabels());
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "anders-aa"
-  RewriteConstraints();
-  // Delete the adr nodes.
-  GraphNodes.resize(FirstRefNode * 2);
-
-  // Now perform HU
-  for (unsigned i = 0; i < GraphNodes.size(); ++i) {
-    Node *N = &GraphNodes[i];
-    if (FindNode(i) == i) {
-      N->PointsTo = new SparseBitVector<>;
-      N->PointedToBy = new SparseBitVector<>;
-      // Reset our labels
-    }
-    VSSCCRep[i] = i;
-    N->PointerEquivLabel = 0;
-  }
-  HU();
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "anders-aa-labels"
-  DEBUG(PrintLabels());
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "anders-aa"
-  RewriteConstraints();
-  for (unsigned i = 0; i < GraphNodes.size(); ++i) {
-    if (FindNode(i) == i) {
-      Node *N = &GraphNodes[i];
-      delete N->PointsTo;
-      N->PointsTo = NULL;
-      delete N->PredEdges;
-      N->PredEdges = NULL;
-      delete N->ImplicitPredEdges;
-      N->ImplicitPredEdges = NULL;
-      delete N->PointedToBy;
-      N->PointedToBy = NULL;
-    }
-  }
-
-  // perform Hybrid Cycle Detection (HCD)
-  HCD();
-  SDTActive = true;
-
-  // No longer any need for the upper half of GraphNodes (for ref nodes).
-  GraphNodes.erase(GraphNodes.begin() + FirstRefNode, GraphNodes.end());
-
-  // HCD complete.
-
-  DEBUG(dbgs() << "Finished constraint optimization\n");
-  FirstRefNode = 0;
-  FirstAdrNode = 0;
-}
-
-/// Unite pointer but not location equivalent variables, now that the constraint
-/// graph is built.
-void Andersens::UnitePointerEquivalences() {
-  DEBUG(dbgs() << "Uniting remaining pointer equivalences\n");
-  for (unsigned i = 0; i < GraphNodes.size(); ++i) {
-    if (GraphNodes[i].AddressTaken && GraphNodes[i].isRep()) {
-      unsigned Label = GraphNodes[i].PointerEquivLabel;
-
-      if (Label && PENLEClass2Node[Label] != -1)
-        UniteNodes(i, PENLEClass2Node[Label]);
-    }
-  }
-  DEBUG(dbgs() << "Finished remaining pointer equivalences\n");
-  PENLEClass2Node.clear();
-}
-
-/// Create the constraint graph used for solving points-to analysis.
-///
-void Andersens::CreateConstraintGraph() {
-  for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
-    Constraint &C = Constraints[i];
-    assert (C.Src < GraphNodes.size() && C.Dest < GraphNodes.size());
-    if (C.Type == Constraint::AddressOf)
-      GraphNodes[C.Dest].PointsTo->set(C.Src);
-    else if (C.Type == Constraint::Load)
-      GraphNodes[C.Src].Constraints.push_back(C);
-    else if (C.Type == Constraint::Store)
-      GraphNodes[C.Dest].Constraints.push_back(C);
-    else if (C.Offset != 0)
-      GraphNodes[C.Src].Constraints.push_back(C);
-    else
-      GraphNodes[C.Src].Edges->set(C.Dest);
-  }
-}
-
-// Perform DFS and cycle detection.
-bool Andersens::QueryNode(unsigned Node) {
-  assert(GraphNodes[Node].isRep() && "Querying a non-rep node");
-  unsigned OurDFS = ++DFSNumber;
-  SparseBitVector<> ToErase;
-  SparseBitVector<> NewEdges;
-  Tarjan2DFS[Node] = OurDFS;
-
-  // Changed denotes a change from a recursive call that we will bubble up.
-  // Merged is set if we actually merge a node ourselves.
-  bool Changed = false, Merged = false;
-
-  for (SparseBitVector<>::iterator bi = GraphNodes[Node].Edges->begin();
-       bi != GraphNodes[Node].Edges->end();
-       ++bi) {
-    unsigned RepNode = FindNode(*bi);
-    // If this edge points to a non-representative node but we are
-    // already planning to add an edge to its representative, we have no
-    // need for this edge anymore.
-    if (RepNode != *bi && NewEdges.test(RepNode)){
-      ToErase.set(*bi);
-      continue;
-    }
-
-    // Continue about our DFS.
-    if (!Tarjan2Deleted[RepNode]){
-      if (Tarjan2DFS[RepNode] == 0) {
-        Changed |= QueryNode(RepNode);
-        // May have been changed by QueryNode
-        RepNode = FindNode(RepNode);
-      }
-      if (Tarjan2DFS[RepNode] < Tarjan2DFS[Node])
-        Tarjan2DFS[Node] = Tarjan2DFS[RepNode];
-    }
-
-    // We may have just discovered that this node is part of a cycle, in
-    // which case we can also erase it.
-    if (RepNode != *bi) {
-      ToErase.set(*bi);
-      NewEdges.set(RepNode);
-    }
-  }
-
-  GraphNodes[Node].Edges->intersectWithComplement(ToErase);
-  GraphNodes[Node].Edges |= NewEdges;
-
-  // If this node is a root of a non-trivial SCC, place it on our 
-  // worklist to be processed.
-  if (OurDFS == Tarjan2DFS[Node]) {
-    while (!SCCStack.empty() && Tarjan2DFS[SCCStack.top()] >= OurDFS) {
-      Node = UniteNodes(Node, SCCStack.top());
-
-      SCCStack.pop();
-      Merged = true;
-    }
-    Tarjan2Deleted[Node] = true;
-
-    if (Merged)
-      NextWL->insert(&GraphNodes[Node]);
-  } else {
-    SCCStack.push(Node);
-  }
-
-  return(Changed | Merged);
-}
-
-/// SolveConstraints - This stage iteratively processes the constraints list
-/// propagating constraints (adding edges to the Nodes in the points-to graph)
-/// until a fixed point is reached.
-///
-/// We use a variant of the technique called "Lazy Cycle Detection", which is
-/// described in "The Ant and the Grasshopper: Fast and Accurate Pointer
-/// Analysis for Millions of Lines of Code. In Programming Language Design and
-/// Implementation (PLDI), June 2007."
-/// The paper describes performing cycle detection one node at a time, which can
-/// be expensive if there are no cycles, but there are long chains of nodes that
-/// it heuristically believes are cycles (because it will DFS from each node
-/// without state from previous nodes).
-/// Instead, we use the heuristic to build a worklist of nodes to check, then
-/// cycle detect them all at the same time to do this more cheaply.  This
-/// catches cycles slightly later than the original technique did, but does it
-/// make significantly cheaper.
-
-void Andersens::SolveConstraints() {
-  CurrWL = &w1;
-  NextWL = &w2;
-
-  OptimizeConstraints();
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "anders-aa-constraints"
-      DEBUG(PrintConstraints());
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "anders-aa"
-
-  for (unsigned i = 0; i < GraphNodes.size(); ++i) {
-    Node *N = &GraphNodes[i];
-    N->PointsTo = new SparseBitVector<>;
-    N->OldPointsTo = new SparseBitVector<>;
-    N->Edges = new SparseBitVector<>;
-  }
-  CreateConstraintGraph();
-  UnitePointerEquivalences();
-  assert(SCCStack.empty() && "SCC Stack should be empty by now!");
-  Node2DFS.clear();
-  Node2Deleted.clear();
-  Node2DFS.insert(Node2DFS.begin(), GraphNodes.size(), 0);
-  Node2Deleted.insert(Node2Deleted.begin(), GraphNodes.size(), false);
-  DFSNumber = 0;
-  DenseSet<Constraint, ConstraintKeyInfo> Seen;
-  DenseSet<std::pair<unsigned,unsigned>, PairKeyInfo> EdgesChecked;
-
-  // Order graph and add initial nodes to work list.
-  for (unsigned i = 0; i < GraphNodes.size(); ++i) {
-    Node *INode = &GraphNodes[i];
-
-    // Add to work list if it's a representative and can contribute to the
-    // calculation right now.
-    if (INode->isRep() && !INode->PointsTo->empty()
-        && (!INode->Edges->empty() || !INode->Constraints.empty())) {
-      INode->Stamp();
-      CurrWL->insert(INode);
-    }
-  }
-  std::queue<unsigned int> TarjanWL;
-#if !FULL_UNIVERSAL
-  // "Rep and special variables" - in order for HCD to maintain conservative
-  // results when !FULL_UNIVERSAL, we need to treat the special variables in
-  // the same way that the !FULL_UNIVERSAL tweak does throughout the rest of
-  // the analysis - it's ok to add edges from the special nodes, but never
-  // *to* the special nodes.
-  std::vector<unsigned int> RSV;
-#endif
-  while( !CurrWL->empty() ) {
-    DEBUG(dbgs() << "Starting iteration #" << ++NumIters << "\n");
-
-    Node* CurrNode;
-    unsigned CurrNodeIndex;
-
-    // Actual cycle checking code.  We cycle check all of the lazy cycle
-    // candidates from the last iteration in one go.
-    if (!TarjanWL.empty()) {
-      DFSNumber = 0;
-      
-      Tarjan2DFS.clear();
-      Tarjan2Deleted.clear();
-      while (!TarjanWL.empty()) {
-        unsigned int ToTarjan = TarjanWL.front();
-        TarjanWL.pop();
-        if (!Tarjan2Deleted[ToTarjan]
-            && GraphNodes[ToTarjan].isRep()
-            && Tarjan2DFS[ToTarjan] == 0)
-          QueryNode(ToTarjan);
-      }
-    }
-    
-    // Add to work list if it's a representative and can contribute to the
-    // calculation right now.
-    while( (CurrNode = CurrWL->pop()) != NULL ) {
-      CurrNodeIndex = CurrNode - &GraphNodes[0];
-      CurrNode->Stamp();
-      
-          
-      // Figure out the changed points to bits
-      SparseBitVector<> CurrPointsTo;
-      CurrPointsTo.intersectWithComplement(CurrNode->PointsTo,
-                                           CurrNode->OldPointsTo);
-      if (CurrPointsTo.empty())
-        continue;
-
-      *(CurrNode->OldPointsTo) |= CurrPointsTo;
-
-      // Check the offline-computed equivalencies from HCD.
-      bool SCC = false;
-      unsigned Rep;
-
-      if (SDT[CurrNodeIndex] >= 0) {
-        SCC = true;
-        Rep = FindNode(SDT[CurrNodeIndex]);
-
-#if !FULL_UNIVERSAL
-        RSV.clear();
-#endif
-        for (SparseBitVector<>::iterator bi = CurrPointsTo.begin();
-             bi != CurrPointsTo.end(); ++bi) {
-          unsigned Node = FindNode(*bi);
-#if !FULL_UNIVERSAL
-          if (Node < NumberSpecialNodes) {
-            RSV.push_back(Node);
-            continue;
-          }
-#endif
-          Rep = UniteNodes(Rep,Node);
-        }
-#if !FULL_UNIVERSAL
-        RSV.push_back(Rep);
-#endif
-
-        NextWL->insert(&GraphNodes[Rep]);
-
-        if ( ! CurrNode->isRep() )
-          continue;
-      }
-
-      Seen.clear();
-
-      /* Now process the constraints for this node.  */
-      for (std::list<Constraint>::iterator li = CurrNode->Constraints.begin();
-           li != CurrNode->Constraints.end(); ) {
-        li->Src = FindNode(li->Src);
-        li->Dest = FindNode(li->Dest);
-
-        // Delete redundant constraints
-        if( Seen.count(*li) ) {
-          std::list<Constraint>::iterator lk = li; li++;
-
-          CurrNode->Constraints.erase(lk);
-          ++NumErased;
-          continue;
-        }
-        Seen.insert(*li);
-
-        // Src and Dest will be the vars we are going to process.
-        // This may look a bit ugly, but what it does is allow us to process
-        // both store and load constraints with the same code.
-        // Load constraints say that every member of our RHS solution has K
-        // added to it, and that variable gets an edge to LHS. We also union
-        // RHS+K's solution into the LHS solution.
-        // Store constraints say that every member of our LHS solution has K
-        // added to it, and that variable gets an edge from RHS. We also union
-        // RHS's solution into the LHS+K solution.
-        unsigned *Src;
-        unsigned *Dest;
-        unsigned K = li->Offset;
-        unsigned CurrMember;
-        if (li->Type == Constraint::Load) {
-          Src = &CurrMember;
-          Dest = &li->Dest;
-        } else if (li->Type == Constraint::Store) {
-          Src = &li->Src;
-          Dest = &CurrMember;
-        } else {
-          // TODO Handle offseted copy constraint
-          li++;
-          continue;
-        }
-
-        // See if we can use Hybrid Cycle Detection (that is, check
-        // if it was a statically detected offline equivalence that
-        // involves pointers; if so, remove the redundant constraints).
-        if( SCC && K == 0 ) {
-#if FULL_UNIVERSAL
-          CurrMember = Rep;
-
-          if (GraphNodes[*Src].Edges->test_and_set(*Dest))
-            if (GraphNodes[*Dest].PointsTo |= *(GraphNodes[*Src].PointsTo))
-              NextWL->insert(&GraphNodes[*Dest]);
-#else
-          for (unsigned i=0; i < RSV.size(); ++i) {
-            CurrMember = RSV[i];
-
-            if (*Dest < NumberSpecialNodes)
-              continue;
-            if (GraphNodes[*Src].Edges->test_and_set(*Dest))
-              if (GraphNodes[*Dest].PointsTo |= *(GraphNodes[*Src].PointsTo))
-                NextWL->insert(&GraphNodes[*Dest]);
-          }
-#endif
-          // since all future elements of the points-to set will be
-          // equivalent to the current ones, the complex constraints
-          // become redundant.
-          //
-          std::list<Constraint>::iterator lk = li; li++;
-#if !FULL_UNIVERSAL
-          // In this case, we can still erase the constraints when the
-          // elements of the points-to sets are referenced by *Dest,
-          // but not when they are referenced by *Src (i.e. for a Load
-          // constraint). This is because if another special variable is
-          // put into the points-to set later, we still need to add the
-          // new edge from that special variable.
-          if( lk->Type != Constraint::Load)
-#endif
-          GraphNodes[CurrNodeIndex].Constraints.erase(lk);
-        } else {
-          const SparseBitVector<> &Solution = CurrPointsTo;
-
-          for (SparseBitVector<>::iterator bi = Solution.begin();
-               bi != Solution.end();
-               ++bi) {
-            CurrMember = *bi;
-
-            // Need to increment the member by K since that is where we are
-            // supposed to copy to/from.  Note that in positive weight cycles,
-            // which occur in address taking of fields, K can go past
-            // MaxK[CurrMember] elements, even though that is all it could point
-            // to.
-            if (K > 0 && K > MaxK[CurrMember])
-              continue;
-            else
-              CurrMember = FindNode(CurrMember + K);
-
-            // Add an edge to the graph, so we can just do regular
-            // bitmap ior next time.  It may also let us notice a cycle.
-#if !FULL_UNIVERSAL
-            if (*Dest < NumberSpecialNodes)
-              continue;
-#endif
-            if (GraphNodes[*Src].Edges->test_and_set(*Dest))
-              if (GraphNodes[*Dest].PointsTo |= *(GraphNodes[*Src].PointsTo))
-                NextWL->insert(&GraphNodes[*Dest]);
-
-          }
-          li++;
-        }
-      }
-      SparseBitVector<> NewEdges;
-      SparseBitVector<> ToErase;
-
-      // Now all we have left to do is propagate points-to info along the
-      // edges, erasing the redundant edges.
-      for (SparseBitVector<>::iterator bi = CurrNode->Edges->begin();
-           bi != CurrNode->Edges->end();
-           ++bi) {
-
-        unsigned DestVar = *bi;
-        unsigned Rep = FindNode(DestVar);
-
-        // If we ended up with this node as our destination, or we've already
-        // got an edge for the representative, delete the current edge.
-        if (Rep == CurrNodeIndex ||
-            (Rep != DestVar && NewEdges.test(Rep))) {
-            ToErase.set(DestVar);
-            continue;
-        }
-        
-        std::pair<unsigned,unsigned> edge(CurrNodeIndex,Rep);
-        
-        // This is where we do lazy cycle detection.
-        // If this is a cycle candidate (equal points-to sets and this
-        // particular edge has not been cycle-checked previously), add to the
-        // list to check for cycles on the next iteration.
-        if (!EdgesChecked.count(edge) &&
-            *(GraphNodes[Rep].PointsTo) == *(CurrNode->PointsTo)) {
-          EdgesChecked.insert(edge);
-          TarjanWL.push(Rep);
-        }
-        // Union the points-to sets into the dest
-#if !FULL_UNIVERSAL
-        if (Rep >= NumberSpecialNodes)
-#endif
-        if (GraphNodes[Rep].PointsTo |= CurrPointsTo) {
-          NextWL->insert(&GraphNodes[Rep]);
-        }
-        // If this edge's destination was collapsed, rewrite the edge.
-        if (Rep != DestVar) {
-          ToErase.set(DestVar);
-          NewEdges.set(Rep);
-        }
-      }
-      CurrNode->Edges->intersectWithComplement(ToErase);
-      CurrNode->Edges |= NewEdges;
-    }
-
-    // Switch to other work list.
-    WorkList* t = CurrWL; CurrWL = NextWL; NextWL = t;
-  }
-
-
-  Node2DFS.clear();
-  Node2Deleted.clear();
-  for (unsigned i = 0; i < GraphNodes.size(); ++i) {
-    Node *N = &GraphNodes[i];
-    delete N->OldPointsTo;
-    delete N->Edges;
-  }
-  SDTActive = false;
-  SDT.clear();
-}
-
-//===----------------------------------------------------------------------===//
-//                               Union-Find
-//===----------------------------------------------------------------------===//
-
-// Unite nodes First and Second, returning the one which is now the
-// representative node.  First and Second are indexes into GraphNodes
-unsigned Andersens::UniteNodes(unsigned First, unsigned Second,
-                               bool UnionByRank) {
-  assert (First < GraphNodes.size() && Second < GraphNodes.size() &&
-          "Attempting to merge nodes that don't exist");
-
-  Node *FirstNode = &GraphNodes[First];
-  Node *SecondNode = &GraphNodes[Second];
-
-  assert (SecondNode->isRep() && FirstNode->isRep() &&
-          "Trying to unite two non-representative nodes!");
-  if (First == Second)
-    return First;
-
-  if (UnionByRank) {
-    int RankFirst  = (int) FirstNode ->NodeRep;
-    int RankSecond = (int) SecondNode->NodeRep;
-
-    // Rank starts at -1 and gets decremented as it increases.
-    // Translation: higher rank, lower NodeRep value, which is always negative.
-    if (RankFirst > RankSecond) {
-      unsigned t = First; First = Second; Second = t;
-      Node* tp = FirstNode; FirstNode = SecondNode; SecondNode = tp;
-    } else if (RankFirst == RankSecond) {
-      FirstNode->NodeRep = (unsigned) (RankFirst - 1);
-    }
-  }
-
-  SecondNode->NodeRep = First;
-#if !FULL_UNIVERSAL
-  if (First >= NumberSpecialNodes)
-#endif
-  if (FirstNode->PointsTo && SecondNode->PointsTo)
-    FirstNode->PointsTo |= *(SecondNode->PointsTo);
-  if (FirstNode->Edges && SecondNode->Edges)
-    FirstNode->Edges |= *(SecondNode->Edges);
-  if (!SecondNode->Constraints.empty())
-    FirstNode->Constraints.splice(FirstNode->Constraints.begin(),
-                                  SecondNode->Constraints);
-  if (FirstNode->OldPointsTo) {
-    delete FirstNode->OldPointsTo;
-    FirstNode->OldPointsTo = new SparseBitVector<>;
-  }
-
-  // Destroy interesting parts of the merged-from node.
-  delete SecondNode->OldPointsTo;
-  delete SecondNode->Edges;
-  delete SecondNode->PointsTo;
-  SecondNode->Edges = NULL;
-  SecondNode->PointsTo = NULL;
-  SecondNode->OldPointsTo = NULL;
-
-  NumUnified++;
-  DEBUG(dbgs() << "Unified Node ");
-  DEBUG(PrintNode(FirstNode));
-  DEBUG(dbgs() << " and Node ");
-  DEBUG(PrintNode(SecondNode));
-  DEBUG(dbgs() << "\n");
-
-  if (SDTActive)
-    if (SDT[Second] >= 0) {
-      if (SDT[First] < 0)
-        SDT[First] = SDT[Second];
-      else {
-        UniteNodes( FindNode(SDT[First]), FindNode(SDT[Second]) );
-        First = FindNode(First);
-      }
-    }
-
-  return First;
-}
-
-// Find the index into GraphNodes of the node representing Node, performing
-// path compression along the way
-unsigned Andersens::FindNode(unsigned NodeIndex) {
-  assert (NodeIndex < GraphNodes.size()
-          && "Attempting to find a node that can't exist");
-  Node *N = &GraphNodes[NodeIndex];
-  if (N->isRep())
-    return NodeIndex;
-  else
-    return (N->NodeRep = FindNode(N->NodeRep));
-}
-
-// Find the index into GraphNodes of the node representing Node, 
-// don't perform path compression along the way (for Print)
-unsigned Andersens::FindNode(unsigned NodeIndex) const {
-  assert (NodeIndex < GraphNodes.size()
-          && "Attempting to find a node that can't exist");
-  const Node *N = &GraphNodes[NodeIndex];
-  if (N->isRep())
-    return NodeIndex;
-  else
-    return FindNode(N->NodeRep);
-}
-
-//===----------------------------------------------------------------------===//
-//                               Debugging Output
-//===----------------------------------------------------------------------===//
-
-void Andersens::PrintNode(const Node *N) const {
-  if (N == &GraphNodes[UniversalSet]) {
-    dbgs() << "<universal>";
-    return;
-  } else if (N == &GraphNodes[NullPtr]) {
-    dbgs() << "<nullptr>";
-    return;
-  } else if (N == &GraphNodes[NullObject]) {
-    dbgs() << "<null>";
-    return;
-  }
-  if (!N->getValue()) {
-    dbgs() << "artificial" << (intptr_t) N;
-    return;
-  }
-
-  assert(N->getValue() != 0 && "Never set node label!");
-  Value *V = N->getValue();
-  if (Function *F = dyn_cast<Function>(V)) {
-    if (isa<PointerType>(F->getFunctionType()->getReturnType()) &&
-        N == &GraphNodes[getReturnNode(F)]) {
-      dbgs() << F->getName() << ":retval";
-      return;
-    } else if (F->getFunctionType()->isVarArg() &&
-               N == &GraphNodes[getVarargNode(F)]) {
-      dbgs() << F->getName() << ":vararg";
-      return;
-    }
-  }
-
-  if (Instruction *I = dyn_cast<Instruction>(V))
-    dbgs() << I->getParent()->getParent()->getName() << ":";
-  else if (Argument *Arg = dyn_cast<Argument>(V))
-    dbgs() << Arg->getParent()->getName() << ":";
-
-  if (V->hasName())
-    dbgs() << V->getName();
-  else
-    dbgs() << "(unnamed)";
-
-  if (isa<GlobalValue>(V) || isa<AllocaInst>(V) || isMalloc(V))
-    if (N == &GraphNodes[getObject(V)])
-      dbgs() << "<mem>";
-}
-void Andersens::PrintConstraint(const Constraint &C) const {
-  if (C.Type == Constraint::Store) {
-    dbgs() << "*";
-    if (C.Offset != 0)
-      dbgs() << "(";
-  }
-  PrintNode(&GraphNodes[C.Dest]);
-  if (C.Type == Constraint::Store && C.Offset != 0)
-    dbgs() << " + " << C.Offset << ")";
-  dbgs() << " = ";
-  if (C.Type == Constraint::Load) {
-    dbgs() << "*";
-    if (C.Offset != 0)
-      dbgs() << "(";
-  }
-  else if (C.Type == Constraint::AddressOf)
-    dbgs() << "&";
-  PrintNode(&GraphNodes[C.Src]);
-  if (C.Offset != 0 && C.Type != Constraint::Store)
-    dbgs() << " + " << C.Offset;
-  if (C.Type == Constraint::Load && C.Offset != 0)
-    dbgs() << ")";
-  dbgs() << "\n";
-}
-
-void Andersens::PrintConstraints() const {
-  dbgs() << "Constraints:\n";
-
-  for (unsigned i = 0, e = Constraints.size(); i != e; ++i)
-    PrintConstraint(Constraints[i]);
-}
-
-void Andersens::PrintPointsToGraph() const {
-  dbgs() << "Points-to graph:\n";
-  for (unsigned i = 0, e = GraphNodes.size(); i != e; ++i) {
-    const Node *N = &GraphNodes[i];
-    if (FindNode(i) != i) {
-      PrintNode(N);
-      dbgs() << "\t--> same as ";
-      PrintNode(&GraphNodes[FindNode(i)]);
-      dbgs() << "\n";
-    } else {
-      dbgs() << "[" << (N->PointsTo->count()) << "] ";
-      PrintNode(N);
-      dbgs() << "\t--> ";
-
-      bool first = true;
-      for (SparseBitVector<>::iterator bi = N->PointsTo->begin();
-           bi != N->PointsTo->end();
-           ++bi) {
-        if (!first)
-          dbgs() << ", ";
-        PrintNode(&GraphNodes[*bi]);
-        first = false;
-      }
-      dbgs() << "\n";
-    }
-  }
-}
diff --git a/lib/Analysis/IPA/CMakeLists.txt b/lib/Analysis/IPA/CMakeLists.txt
index 1ebb0be..007ad22 100644
--- a/lib/Analysis/IPA/CMakeLists.txt
+++ b/lib/Analysis/IPA/CMakeLists.txt
@@ -1,5 +1,4 @@
 add_llvm_library(LLVMipa
-  Andersens.cpp
   CallGraph.cpp
   CallGraphSCCPass.cpp
   FindUsedTypes.cpp
diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp
index ec94bc8..7b43089 100644
--- a/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -213,7 +213,7 @@ void GlobalsModRef::AnalyzeGlobals(Module &M) {
         ++NumNonAddrTakenGlobalVars;
 
         // If this global holds a pointer type, see if it is an indirect global.
-        if (isa<PointerType>(I->getType()->getElementType()) &&
+        if (I->getType()->getElementType()->isPointerTy() &&
             AnalyzeIndirectGlobalMemory(I))
           ++NumIndirectGlobalVars;
       }
@@ -231,7 +231,7 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
                                          std::vector<Function*> &Readers,
                                          std::vector<Function*> &Writers,
                                          GlobalValue *OkayStoreDest) {
-  if (!isa<PointerType>(V->getType())) return true;
+  if (!V->getType()->isPointerTy()) return true;
 
   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
     if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index 9c472ae..98a436f 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Assembly/AsmAnnotationWriter.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -35,42 +36,30 @@ Pass *llvm::createIVUsersPass() {
   return new IVUsers();
 }
 
-/// containsAddRecFromDifferentLoop - Determine whether expression S involves a
-/// subexpression that is an AddRec from a loop other than L.  An outer loop
-/// of L is OK, but not an inner loop nor a disjoint loop.
-static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) {
-  // This is very common, put it first.
-  if (isa<SCEVConstant>(S))
-    return false;
-  if (const SCEVCommutativeExpr *AE = dyn_cast<SCEVCommutativeExpr>(S)) {
-    for (unsigned int i=0; i< AE->getNumOperands(); i++)
-      if (containsAddRecFromDifferentLoop(AE->getOperand(i), L))
-        return true;
-    return false;
-  }
-  if (const SCEVAddRecExpr *AE = dyn_cast<SCEVAddRecExpr>(S)) {
-    if (const Loop *newLoop = AE->getLoop()) {
-      if (newLoop == L)
-        return false;
-      // if newLoop is an outer loop of L, this is OK.
-      if (newLoop->contains(L))
-        return false;
+/// CollectSubexprs - Split S into subexpressions which can be pulled out into
+/// separate registers.
+static void CollectSubexprs(const SCEV *S,
+                            SmallVectorImpl<const SCEV *> &Ops,
+                            ScalarEvolution &SE) {
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    // Break out add operands.
+    for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+         I != E; ++I)
+      CollectSubexprs(*I, Ops, SE);
+    return;
+  } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    // Split a non-zero base out of an addrec.
+    if (!AR->getStart()->isZero()) {
+      CollectSubexprs(AR->getStart(), Ops, SE);
+      CollectSubexprs(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()),
+                                       AR->getStepRecurrence(SE),
+                                       AR->getLoop()), Ops, SE);
+      return;
     }
-    return true;
   }
-  if (const SCEVUDivExpr *DE = dyn_cast<SCEVUDivExpr>(S))
-    return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
-           containsAddRecFromDifferentLoop(DE->getRHS(), L);
-#if 0
-  // SCEVSDivExpr has been backed out temporarily, but will be back; we'll
-  // need this when it is.
-  if (const SCEVSDivExpr *DE = dyn_cast<SCEVSDivExpr>(S))
-    return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
-           containsAddRecFromDifferentLoop(DE->getRHS(), L);
-#endif
-  if (const SCEVCastExpr *CE = dyn_cast<SCEVCastExpr>(S))
-    return containsAddRecFromDifferentLoop(CE->getOperand(), L);
-  return false;
+
+  // Otherwise use the value itself.
+  Ops.push_back(S);
 }
 
 /// getSCEVStartAndStride - Compute the start and stride of this expression,
@@ -89,35 +78,42 @@ static bool getSCEVStartAndStride(const SCEV *&SH, Loop *L, Loop *UseLoop,
   if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(SH)) {
     for (unsigned i = 0, e = AE->getNumOperands(); i != e; ++i)
       if (const SCEVAddRecExpr *AddRec =
-             dyn_cast<SCEVAddRecExpr>(AE->getOperand(i))) {
-        if (AddRec->getLoop() == L)
-          TheAddRec = SE->getAddExpr(AddRec, TheAddRec);
-        else
-          return false;  // Nested IV of some sort?
-      } else {
+             dyn_cast<SCEVAddRecExpr>(AE->getOperand(i)))
+        TheAddRec = SE->getAddExpr(AddRec, TheAddRec);
+      else
         Start = SE->getAddExpr(Start, AE->getOperand(i));
-      }
   } else if (isa<SCEVAddRecExpr>(SH)) {
     TheAddRec = SH;
   } else {
     return false;  // not analyzable.
   }
 
-  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(TheAddRec);
-  if (!AddRec || AddRec->getLoop() != L) return false;
+  // Break down TheAddRec into its component parts.
+  SmallVector<const SCEV *, 4> Subexprs;
+  CollectSubexprs(TheAddRec, Subexprs, *SE);
+
+  // Look for an addrec on the current loop among the parts.
+  const SCEV *AddRecStride = 0;
+  for (SmallVectorImpl<const SCEV *>::iterator I = Subexprs.begin(),
+       E = Subexprs.end(); I != E; ++I) {
+    const SCEV *S = *I;
+    if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+      if (AR->getLoop() == L) {
+        *I = AR->getStart();
+        AddRecStride = AR->getStepRecurrence(*SE);
+        break;
+      }
+  }
+  if (!AddRecStride)
+    return false;
+
+  // Add up everything else into a start value (which may not be
+  // loop-invariant).
+  const SCEV *AddRecStart = SE->getAddExpr(Subexprs);
 
   // Use getSCEVAtScope to attempt to simplify other loops out of
   // the picture.
-  const SCEV *AddRecStart = AddRec->getStart();
   AddRecStart = SE->getSCEVAtScope(AddRecStart, UseLoop);
-  const SCEV *AddRecStride = AddRec->getStepRecurrence(*SE);
-
-  // FIXME: If Start contains an SCEVAddRecExpr from a different loop, other
-  // than an outer loop of the current loop, reject it.  LSR has no concept of
-  // operating on more than one loop at a time so don't confuse it with such
-  // expressions.
-  if (containsAddRecFromDifferentLoop(AddRecStart, L))
-    return false;
 
   Start = SE->getAddExpr(Start, AddRecStart);
 
@@ -130,7 +126,7 @@ static bool getSCEVStartAndStride(const SCEV *&SH, Loop *L, Loop *UseLoop,
 
     DEBUG(dbgs() << "[";
           WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
-          dbgs() << "] Variable stride: " << *AddRec << "\n");
+          dbgs() << "] Variable stride: " << *AddRecStride << "\n");
   }
 
   Stride = AddRecStride;
@@ -146,8 +142,7 @@ static bool getSCEVStartAndStride(const SCEV *&SH, Loop *L, Loop *UseLoop,
 /// the loop, resulting in reg-reg copies (if we use the pre-inc value when we
 /// should use the post-inc value).
 static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV,
-                                       Loop *L, LoopInfo *LI, DominatorTree *DT,
-                                       Pass *P) {
+                                       Loop *L, DominatorTree *DT) {
   // If the user is in the loop, use the preinc value.
   if (L->contains(User)) return false;
 
@@ -227,7 +222,7 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
     // Descend recursively, but not into PHI nodes outside the current loop.
     // It's important to see the entire expression outside the loop to get
     // choices that depend on addressing mode use right, although we won't
-    // consider references ouside the loop in all cases.
+    // consider references outside the loop in all cases.
     // If User is already in Processed, we don't want to recurse into it again,
     // but do want to record a second reference in the same instruction.
     bool AddUserToIVUsers = false;
@@ -246,42 +241,28 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
     }
 
     if (AddUserToIVUsers) {
-      IVUsersOfOneStride *StrideUses = IVUsesByStride[Stride];
-      if (!StrideUses) {    // First occurrence of this stride?
-        StrideOrder.push_back(Stride);
-        StrideUses = new IVUsersOfOneStride(Stride);
-        IVUses.push_back(StrideUses);
-        IVUsesByStride[Stride] = StrideUses;
-      }
-
       // Okay, we found a user that we cannot reduce.  Analyze the instruction
       // and decide what to do with it.  If we are a use inside of the loop, use
       // the value before incrementation, otherwise use it after incrementation.
-      if (IVUseShouldUsePostIncValue(User, I, L, LI, DT, this)) {
+      if (IVUseShouldUsePostIncValue(User, I, L, DT)) {
         // The value used will be incremented by the stride more than we are
         // expecting, so subtract this off.
         const SCEV *NewStart = SE->getMinusSCEV(Start, Stride);
-        StrideUses->addUser(NewStart, User, I);
-        StrideUses->Users.back().setIsUseOfPostIncrementedValue(true);
+        IVUses.push_back(new IVStrideUse(this, Stride, NewStart, User, I));
+        IVUses.back().setIsUseOfPostIncrementedValue(true);
         DEBUG(dbgs() << "   USING POSTINC SCEV, START=" << *NewStart<< "\n");
       } else {
-        StrideUses->addUser(Start, User, I);
+        IVUses.push_back(new IVStrideUse(this, Stride, Start, User, I));
       }
     }
   }
   return true;
 }
 
-void IVUsers::AddUser(const SCEV *Stride, const SCEV *Offset,
-                      Instruction *User, Value *Operand) {
-  IVUsersOfOneStride *StrideUses = IVUsesByStride[Stride];
-  if (!StrideUses) {    // First occurrence of this stride?
-    StrideOrder.push_back(Stride);
-    StrideUses = new IVUsersOfOneStride(Stride);
-    IVUses.push_back(StrideUses);
-    IVUsesByStride[Stride] = StrideUses;
-  }
-  IVUsesByStride[Stride]->addUser(Offset, User, Operand);
+IVStrideUse &IVUsers::AddUser(const SCEV *Stride, const SCEV *Offset,
+                              Instruction *User, Value *Operand) {
+  IVUses.push_back(new IVStrideUse(this, Stride, Offset, User, Operand));
+  return IVUses.back();
 }
 
 IVUsers::IVUsers()
@@ -315,15 +296,15 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
 /// value of the OperandValToReplace of the given IVStrideUse.
 const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &U) const {
   // Start with zero.
-  const SCEV *RetVal = SE->getIntegerSCEV(0, U.getParent()->Stride->getType());
+  const SCEV *RetVal = SE->getIntegerSCEV(0, U.getStride()->getType());
   // Create the basic add recurrence.
-  RetVal = SE->getAddRecExpr(RetVal, U.getParent()->Stride, L);
+  RetVal = SE->getAddRecExpr(RetVal, U.getStride(), L);
   // Add the offset in a separate step, because it may be loop-variant.
   RetVal = SE->getAddExpr(RetVal, U.getOffset());
   // For uses of post-incremented values, add an extra stride to compute
   // the actual replacement value.
   if (U.isUseOfPostIncrementedValue())
-    RetVal = SE->getAddExpr(RetVal, U.getParent()->Stride);
+    RetVal = SE->getAddExpr(RetVal, U.getStride());
   return RetVal;
 }
 
@@ -332,9 +313,9 @@ const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &U) const {
 /// isUseOfPostIncrementedValue flag.
 const SCEV *IVUsers::getCanonicalExpr(const IVStrideUse &U) const {
   // Start with zero.
-  const SCEV *RetVal = SE->getIntegerSCEV(0, U.getParent()->Stride->getType());
+  const SCEV *RetVal = SE->getIntegerSCEV(0, U.getStride()->getType());
   // Create the basic add recurrence.
-  RetVal = SE->getAddRecExpr(RetVal, U.getParent()->Stride, L);
+  RetVal = SE->getAddRecExpr(RetVal, U.getStride(), L);
   // Add the offset in a separate step, because it may be loop-variant.
   RetVal = SE->getAddExpr(RetVal, U.getOffset());
   return RetVal;
@@ -349,24 +330,20 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const {
   }
   OS << ":\n";
 
-  for (unsigned Stride = 0, e = StrideOrder.size(); Stride != e; ++Stride) {
-    std::map<const SCEV *, IVUsersOfOneStride*>::const_iterator SI =
-      IVUsesByStride.find(StrideOrder[Stride]);
-    assert(SI != IVUsesByStride.end() && "Stride doesn't exist!");
-    OS << "  Stride " << *SI->first->getType() << " " << *SI->first << ":\n";
-
-    for (ilist<IVStrideUse>::const_iterator UI = SI->second->Users.begin(),
-         E = SI->second->Users.end(); UI != E; ++UI) {
-      OS << "    ";
-      WriteAsOperand(OS, UI->getOperandValToReplace(), false);
-      OS << " = ";
-      OS << *getReplacementExpr(*UI);
-      if (UI->isUseOfPostIncrementedValue())
-        OS << " (post-inc)";
-      OS << " in ";
-      UI->getUser()->print(OS);
-      OS << '\n';
-    }
+  // Use a default AssemblyAnnotationWriter to suppress the default info
+  // comments, which aren't relevant here.
+  AssemblyAnnotationWriter Annotator;
+  for (ilist<IVStrideUse>::const_iterator UI = IVUses.begin(),
+       E = IVUses.end(); UI != E; ++UI) {
+    OS << "  ";
+    WriteAsOperand(OS, UI->getOperandValToReplace(), false);
+    OS << " = "
+       << *getReplacementExpr(*UI);
+    if (UI->isUseOfPostIncrementedValue())
+      OS << " (post-inc)";
+    OS << " in  ";
+    UI->getUser()->print(OS, &Annotator);
+    OS << '\n';
   }
 }
 
@@ -375,37 +352,12 @@ void IVUsers::dump() const {
 }
 
 void IVUsers::releaseMemory() {
-  IVUsesByStride.clear();
-  StrideOrder.clear();
   Processed.clear();
   IVUses.clear();
 }
 
 void IVStrideUse::deleted() {
   // Remove this user from the list.
-  Parent->Users.erase(this);
+  Parent->IVUses.erase(this);
   // this now dangles!
 }
-
-void IVUsersOfOneStride::print(raw_ostream &OS) const {
-  OS << "IV Users of one stride:\n";
-
-  if (Stride)
-    OS << "    Stride: " << *Stride << '\n';
-
-  OS << "    Users:\n";
-
-  unsigned Count = 1;
-
-  for (ilist<IVStrideUse>::const_iterator
-         I = Users.begin(), E = Users.end(); I != E; ++I) {
-    const IVStrideUse &SU = *I;
-    OS << "      " << Count++ << '\n';
-    OS << "        Offset: " << *SU.getOffset() << '\n';
-    OS << "         Instr: " << *SU << '\n';
-  }
-}
-
-void IVUsersOfOneStride::dump() const {
-  print(dbgs());
-}
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 972d034..ca50a17 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -84,7 +84,7 @@ unsigned InlineCostAnalyzer::FunctionInfo::
 //
 unsigned InlineCostAnalyzer::FunctionInfo::
          CountCodeReductionForAlloca(Value *V) {
-  if (!isa<PointerType>(V->getType())) return 0;  // Not a pointer
+  if (!V->getType()->isPointerTy()) return 0;  // Not a pointer
   unsigned Reduction = 0;
   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
     Instruction *I = cast<Instruction>(*UI);
@@ -175,7 +175,7 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
         this->usesDynamicAlloca = true;
     }
 
-    if (isa<ExtractElementInst>(II) || isa<VectorType>(II->getType()))
+    if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy())
       ++NumVectorInsts; 
     
     if (const CastInst *CI = dyn_cast<CastInst>(II)) {
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index b53ac13..8288e96 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -194,11 +194,10 @@ Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   const Type *ITy = GetCompareTy(LHS);
   
   // icmp X, X -> true/false
-  if (LHS == RHS)
+  // X icmp undef -> true/false.  For example, icmp ugt %X, undef -> false
+  // because X could be 0.
+  if (LHS == RHS || isa<UndefValue>(RHS))
     return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred));
-
-  if (isa<UndefValue>(RHS))                  // X icmp undef -> undef
-    return UndefValue::get(ITy);
   
   // icmp <global/alloca*/null>, <global/alloca*/null> - Global/Stack value
   // addresses never equal each other!  We already know that Op0 != Op1.
@@ -283,6 +282,32 @@ Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         // True if unordered.
         return ConstantInt::getTrue(CFP->getContext());
       }
+      // Check whether the constant is an infinity.
+      if (CFP->getValueAPF().isInfinity()) {
+        if (CFP->getValueAPF().isNegative()) {
+          switch (Pred) {
+          case FCmpInst::FCMP_OLT:
+            // No value is ordered and less than negative infinity.
+            return ConstantInt::getFalse(CFP->getContext());
+          case FCmpInst::FCMP_UGE:
+            // All values are unordered with or at least negative infinity.
+            return ConstantInt::getTrue(CFP->getContext());
+          default:
+            break;
+          }
+        } else {
+          switch (Pred) {
+          case FCmpInst::FCMP_OGT:
+            // No value is ordered and greater than infinity.
+            return ConstantInt::getFalse(CFP->getContext());
+          case FCmpInst::FCMP_ULE:
+            // All values are unordered with and at most infinity.
+            return ConstantInt::getTrue(CFP->getContext());
+          default:
+            break;
+          }
+        }
+      }
     }
   }
   
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 2d74709d..2aa2f17 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -580,7 +580,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
 void MemoryDependenceAnalysis::
 getNonLocalPointerDependency(Value *Pointer, bool isLoad, BasicBlock *FromBB,
                              SmallVectorImpl<NonLocalDepResult> &Result) {
-  assert(isa<PointerType>(Pointer->getType()) &&
+  assert(Pointer->getType()->isPointerTy() &&
          "Can't get pointer deps of a non-pointer!");
   Result.clear();
   
@@ -861,7 +861,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize,
       // Get the PHI translated pointer in this predecessor.  This can fail if
       // not translatable, in which case the getAddr() returns null.
       PHITransAddr PredPointer(Pointer);
-      PredPointer.PHITranslateValue(BB, Pred);
+      PredPointer.PHITranslateValue(BB, Pred, 0);
 
       Value *PredPtrVal = PredPointer.getAddr();
       
@@ -1009,13 +1009,20 @@ RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) {
 /// in more places that cached info does not necessarily keep.
 void MemoryDependenceAnalysis::invalidateCachedPointerInfo(Value *Ptr) {
   // If Ptr isn't really a pointer, just ignore it.
-  if (!isa<PointerType>(Ptr->getType())) return;
+  if (!Ptr->getType()->isPointerTy()) return;
   // Flush store info for the pointer.
   RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
   // Flush load info for the pointer.
   RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
 }
 
+/// invalidateCachedPredecessors - Clear the PredIteratorCache info.
+/// This needs to be done when the CFG changes, e.g., due to splitting
+/// critical edges.
+void MemoryDependenceAnalysis::invalidateCachedPredecessors() {
+  PredCache->clear();
+}
+
 /// removeInstruction - Remove an instruction from the dependence analysis,
 /// updating the dependence of instructions that previously depended on it.
 /// This method attempts to keep the cache coherent using the reverse map.
@@ -1050,7 +1057,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
   
   // Remove it from both the load info and the store info.  The instruction
   // can't be in either of these maps if it is non-pointer.
-  if (isa<PointerType>(RemInst->getType())) {
+  if (RemInst->getType()->isPointerTy()) {
     RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
     RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
   }
diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp
index 334a188..8e4fa03 100644
--- a/lib/Analysis/PHITransAddr.cpp
+++ b/lib/Analysis/PHITransAddr.cpp
@@ -134,7 +134,8 @@ static void RemoveInstInputs(Value *V,
 }
 
 Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
-                                         BasicBlock *PredBB) {
+                                         BasicBlock *PredBB,
+                                         const DominatorTree *DT) {
   // If this is a non-instruction value, it can't require PHI translation.
   Instruction *Inst = dyn_cast<Instruction>(V);
   if (Inst == 0) return V;
@@ -177,7 +178,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
   // operands need to be phi translated, and if so, reconstruct it.
   
   if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) {
-    Value *PHIIn = PHITranslateSubExpr(BC->getOperand(0), CurBB, PredBB);
+    Value *PHIIn = PHITranslateSubExpr(BC->getOperand(0), CurBB, PredBB, DT);
     if (PHIIn == 0) return 0;
     if (PHIIn == BC->getOperand(0))
       return BC;
@@ -193,7 +194,8 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
     for (Value::use_iterator UI = PHIIn->use_begin(), E = PHIIn->use_end();
          UI != E; ++UI) {
       if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI))
-        if (BCI->getType() == BC->getType())
+        if (BCI->getType() == BC->getType() &&
+            (!DT || DT->dominates(BCI->getParent(), PredBB)))
           return BCI;
     }
     return 0;
@@ -204,7 +206,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
     SmallVector<Value*, 8> GEPOps;
     bool AnyChanged = false;
     for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
-      Value *GEPOp = PHITranslateSubExpr(GEP->getOperand(i), CurBB, PredBB);
+      Value *GEPOp = PHITranslateSubExpr(GEP->getOperand(i), CurBB, PredBB, DT);
       if (GEPOp == 0) return 0;
       
       AnyChanged |= GEPOp != GEP->getOperand(i);
@@ -229,7 +231,8 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
       if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI))
         if (GEPI->getType() == GEP->getType() &&
             GEPI->getNumOperands() == GEPOps.size() &&
-            GEPI->getParent()->getParent() == CurBB->getParent()) {
+            GEPI->getParent()->getParent() == CurBB->getParent() &&
+            (!DT || DT->dominates(GEPI->getParent(), PredBB))) {
           bool Mismatch = false;
           for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
             if (GEPI->getOperand(i) != GEPOps[i]) {
@@ -251,7 +254,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
     bool isNSW = cast<BinaryOperator>(Inst)->hasNoSignedWrap();
     bool isNUW = cast<BinaryOperator>(Inst)->hasNoUnsignedWrap();
     
-    Value *LHS = PHITranslateSubExpr(Inst->getOperand(0), CurBB, PredBB);
+    Value *LHS = PHITranslateSubExpr(Inst->getOperand(0), CurBB, PredBB, DT);
     if (LHS == 0) return 0;
     
     // If the PHI translated LHS is an add of a constant, fold the immediates.
@@ -287,7 +290,8 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
       if (BinaryOperator *BO = dyn_cast<BinaryOperator>(*UI))
         if (BO->getOpcode() == Instruction::Add &&
             BO->getOperand(0) == LHS && BO->getOperand(1) == RHS &&
-            BO->getParent()->getParent() == CurBB->getParent())
+            BO->getParent()->getParent() == CurBB->getParent() &&
+            (!DT || DT->dominates(BO->getParent(), PredBB)))
           return BO;
     }
     
@@ -300,33 +304,24 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
 
 
 /// PHITranslateValue - PHI translate the current address up the CFG from
-/// CurBB to Pred, updating our state the reflect any needed changes.  This
-/// returns true on failure and sets Addr to null.
-bool PHITransAddr::PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB) {
+/// CurBB to Pred, updating our state to reflect any needed changes.  If the
+/// dominator tree DT is non-null, the translated value must dominate
+/// PredBB.  This returns true on failure and sets Addr to null.
+bool PHITransAddr::PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB,
+                                     const DominatorTree *DT) {
   assert(Verify() && "Invalid PHITransAddr!");
-  Addr = PHITranslateSubExpr(Addr, CurBB, PredBB);
+  Addr = PHITranslateSubExpr(Addr, CurBB, PredBB, DT);
   assert(Verify() && "Invalid PHITransAddr!");
-  return Addr == 0;
-}
 
-/// GetAvailablePHITranslatedSubExpr - Return the value computed by
-/// PHITranslateSubExpr if it dominates PredBB, otherwise return null.
-Value *PHITransAddr::
-GetAvailablePHITranslatedSubExpr(Value *V, BasicBlock *CurBB,BasicBlock *PredBB,
-                                 const DominatorTree &DT) const {
-  PHITransAddr Tmp(V, TD);
-  Tmp.PHITranslateValue(CurBB, PredBB);
-  
-  // See if PHI translation succeeds.
-  V = Tmp.getAddr();
-  
-  // Make sure the value is live in the predecessor.
-  if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
-    if (!DT.dominates(Inst->getParent(), PredBB))
-      return 0;
-  return V;
-}
+  if (DT) {
+    // Make sure the value is live in the predecessor.
+    if (Instruction *Inst = dyn_cast_or_null<Instruction>(Addr))
+      if (!DT->dominates(Inst->getParent(), PredBB))
+        Addr = 0;
+  }
 
+  return Addr == 0;
+}
 
 /// PHITranslateWithInsertion - PHI translate this value into the specified
 /// predecessor block, inserting a computation of the value if it is
@@ -365,8 +360,9 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
                            SmallVectorImpl<Instruction*> &NewInsts) {
   // See if we have a version of this value already available and dominating
   // PredBB.  If so, there is no need to insert a new instance of it.
-  if (Value *Res = GetAvailablePHITranslatedSubExpr(InVal, CurBB, PredBB, DT))
-    return Res;
+  PHITransAddr Tmp(InVal, TD);
+  if (!Tmp.PHITranslateValue(CurBB, PredBB, &DT))
+    return Tmp.getAddr();
 
   // If we don't have an available version of this value, it must be an
   // instruction.
diff --git a/lib/Analysis/PointerTracking.cpp b/lib/Analysis/PointerTracking.cpp
index 8da07e7..ce7ac89 100644
--- a/lib/Analysis/PointerTracking.cpp
+++ b/lib/Analysis/PointerTracking.cpp
@@ -231,7 +231,7 @@ void PointerTracking::print(raw_ostream &OS, const Module* M) const {
   // this should be safe for the same reason its safe for SCEV.
   PointerTracking &PT = *const_cast<PointerTracking*>(this);
   for (inst_iterator I=inst_begin(*FF), E=inst_end(*FF); I != E; ++I) {
-    if (!isa<PointerType>(I->getType()))
+    if (!I->getType()->isPointerTy())
       continue;
     Value *Base;
     const SCEV *Limit, *Offset;
diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp
index 85531be..66760c6 100644
--- a/lib/Analysis/ProfileInfo.cpp
+++ b/lib/Analysis/ProfileInfo.cpp
@@ -246,7 +246,7 @@ const BasicBlock *ProfileInfoT<Function,BasicBlock>::
 
     succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB);
     if (Succ == End) {
-      P[0] = BB;
+      P[ reinterpret_cast<const llvm::BasicBlock*>(0) ] = BB;
       if (Mode & GetPathToExit) {
         hasFoundPath = true;
         BB = 0;
@@ -753,10 +753,10 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) {
            Succ != End; ++Succ) {
         Path P;
         GetPath(*Succ, 0, P, GetPathToExit);
-        if (Dest && Dest != P[0]) {
+        if (Dest && Dest != P[ reinterpret_cast<const llvm::BasicBlock*>(0) ]) {
           AllEdgesHaveSameReturn = false;
         }
-        Dest = P[0];
+        Dest = P[ reinterpret_cast<const llvm::BasicBlock*>(0) ];
       }
       if (AllEdgesHaveSameReturn) {
         if(EstimateMissingEdges(BB)) {
@@ -928,7 +928,7 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) {
 
       Path P;
       const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToExit | GetPathWithNewEdges);
-      Dest = P[0];
+      Dest = P[ reinterpret_cast<const llvm::BasicBlock*>(0) ];
       if (!Dest) continue;
 
       if (getEdgeWeight(getEdge(Dest,0)) == MissingValue) {
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 82be9cd..b979f33 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -214,8 +214,8 @@ bool SCEVCastExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
 SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeID &ID,
                                    const SCEV *op, const Type *ty)
   : SCEVCastExpr(ID, scTruncate, op, ty) {
-  assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) &&
-         (Ty->isInteger() || isa<PointerType>(Ty)) &&
+  assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
          "Cannot truncate non-integer value!");
 }
 
@@ -226,8 +226,8 @@ void SCEVTruncateExpr::print(raw_ostream &OS) const {
 SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeID &ID,
                                        const SCEV *op, const Type *ty)
   : SCEVCastExpr(ID, scZeroExtend, op, ty) {
-  assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) &&
-         (Ty->isInteger() || isa<PointerType>(Ty)) &&
+  assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
          "Cannot zero extend non-integer value!");
 }
 
@@ -238,8 +238,8 @@ void SCEVZeroExtendExpr::print(raw_ostream &OS) const {
 SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeID &ID,
                                        const SCEV *op, const Type *ty)
   : SCEVCastExpr(ID, scSignExtend, op, ty) {
-  assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) &&
-         (Ty->isInteger() || isa<PointerType>(Ty)) &&
+  assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
          "Cannot sign extend non-integer value!");
 }
 
@@ -312,6 +312,21 @@ bool SCEVAddRecExpr::isLoopInvariant(const Loop *QueryLoop) const {
   return true;
 }
 
+bool
+SCEVAddRecExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
+  return DT->dominates(L->getHeader(), BB) &&
+         SCEVNAryExpr::dominates(BB, DT);
+}
+
+bool
+SCEVAddRecExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
+  // This uses a "dominates" query instead of "properly dominates" query because
+  // the instruction which produces the addrec's value is a PHI, and a PHI
+  // effectively properly dominates its entire containing block.
+  return DT->dominates(L->getHeader(), BB) &&
+         SCEVNAryExpr::properlyDominates(BB, DT);
+}
+
 void SCEVAddRecExpr::print(raw_ostream &OS) const {
   OS << "{" << *Operands[0];
   for (unsigned i = 1, e = Operands.size(); i != e; ++i)
@@ -379,7 +394,7 @@ bool SCEVUnknown::isAlignOf(const Type *&AllocTy) const {
               if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(2)))
                 if (CI->isOne() &&
                     STy->getNumElements() == 2 &&
-                    STy->getElementType(0)->isInteger(1)) {
+                    STy->getElementType(0)->isIntegerTy(1)) {
                   AllocTy = STy->getElementType(1);
                   return true;
                 }
@@ -401,7 +416,7 @@ bool SCEVUnknown::isOffsetOf(const Type *&CTy, Constant *&FieldNo) const {
             cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
           // Ignore vector types here so that ScalarEvolutionExpander doesn't
           // emit getelementptrs that index into vectors.
-          if (isa<StructType>(Ty) || isa<ArrayType>(Ty)) {
+          if (Ty->isStructTy() || Ty->isArrayTy()) {
             CTy = Ty;
             FieldNo = CE->getOperand(2);
             return true;
@@ -503,9 +518,9 @@ namespace {
 
         // Order pointer values after integer values. This helps SCEVExpander
         // form GEPs.
-        if (isa<PointerType>(LU->getType()) && !isa<PointerType>(RU->getType()))
+        if (LU->getType()->isPointerTy() && !RU->getType()->isPointerTy())
           return false;
-        if (isa<PointerType>(RU->getType()) && !isa<PointerType>(LU->getType()))
+        if (RU->getType()->isPointerTy() && !LU->getType()->isPointerTy())
           return true;
 
         // Compare getValueID values.
@@ -601,7 +616,7 @@ namespace {
 /// When this routine is finished, we know that any duplicates in the vector are
 /// consecutive and that complexity is monotonically increasing.
 ///
-/// Note that we go take special precautions to ensure that we get determinstic
+/// Note that we go take special precautions to ensure that we get deterministic
 /// results from this routine.  In other words, we don't want the results of
 /// this to depend on where the addresses of various SCEV objects happened to
 /// land in memory.
@@ -729,7 +744,7 @@ static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
   // We need at least W + T bits for the multiplication step
   unsigned CalculationBits = W + T;
 
-  // Calcuate 2^T, at width T+W.
+  // Calculate 2^T, at width T+W.
   APInt DivFactor = APInt(CalculationBits, 1).shl(T);
 
   // Calculate the multiplicative inverse of K! / 2^T;
@@ -906,9 +921,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
         if (MaxBECount == RecastedMaxBECount) {
           const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
           // Check whether Start+Step*MaxBECount has no unsigned overflow.
-          const SCEV *ZMul =
-            getMulExpr(CastedMaxBECount,
-                       getTruncateOrZeroExtend(Step, Start->getType()));
+          const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step);
           const SCEV *Add = getAddExpr(Start, ZMul);
           const SCEV *OperandExtendedAdd =
             getAddExpr(getZeroExtendExpr(Start, WideTy),
@@ -922,9 +935,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
 
           // Similar to above, only this time treat the step value as signed.
           // This covers loops that count down.
-          const SCEV *SMul =
-            getMulExpr(CastedMaxBECount,
-                       getTruncateOrSignExtend(Step, Start->getType()));
+          const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
           Add = getAddExpr(Start, SMul);
           OperandExtendedAdd =
             getAddExpr(getZeroExtendExpr(Start, WideTy),
@@ -1045,9 +1056,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
         if (MaxBECount == RecastedMaxBECount) {
           const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
           // Check whether Start+Step*MaxBECount has no signed overflow.
-          const SCEV *SMul =
-            getMulExpr(CastedMaxBECount,
-                       getTruncateOrSignExtend(Step, Start->getType()));
+          const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
           const SCEV *Add = getAddExpr(Start, SMul);
           const SCEV *OperandExtendedAdd =
             getAddExpr(getSignExtendExpr(Start, WideTy),
@@ -1061,9 +1070,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
 
           // Similar to above, only this time treat the step value as unsigned.
           // This covers loops that count up with an unsigned step.
-          const SCEV *UMul =
-            getMulExpr(CastedMaxBECount,
-                       getTruncateOrZeroExtend(Step, Start->getType()));
+          const SCEV *UMul = getMulExpr(CastedMaxBECount, Step);
           Add = getAddExpr(Start, UMul);
           OperandExtendedAdd =
             getAddExpr(getSignExtendExpr(Start, WideTy),
@@ -1403,7 +1410,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
 
     // If we deleted at least one add, we added operands to the end of the list,
     // and they are not necessarily sorted.  Recurse to resort and resimplify
-    // any operands we just aquired.
+    // any operands we just acquired.
     if (DeletedAdd)
       return getAddExpr(Ops);
   }
@@ -1710,7 +1717,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
 
     // If we deleted at least one mul, we added operands to the end of the list,
     // and they are not necessarily sorted.  Recurse to resort and resimplify
-    // any operands we just aquired.
+    // any operands we just acquired.
     if (DeletedMul)
       return getMulExpr(Ops);
   }
@@ -1958,6 +1965,12 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
     return getAddRecExpr(Operands, L, HasNUW, HasNSW); // {X,+,0}  -->  X
   }
 
+  // It's tempting to want to call getMaxBackedgeTakenCount count here and
+  // use that information to infer NUW and NSW flags. However, computing a
+  // BE count requires calling getAddRecExpr, so we may not yet have a
+  // meaningful BE count at this point (and if we don't, we'd be stuck
+  // with a SCEVCouldNotCompute as the cached BE count).
+
   // If HasNSW is true and all the operands are non-negative, infer HasNUW.
   if (!HasNUW && HasNSW) {
     bool All = true;
@@ -2293,7 +2306,7 @@ const SCEV *ScalarEvolution::getUnknown(Value *V) {
 /// has access to target-specific information.
 bool ScalarEvolution::isSCEVable(const Type *Ty) const {
   // Integers and pointers are always SCEVable.
-  return Ty->isInteger() || isa<PointerType>(Ty);
+  return Ty->isIntegerTy() || Ty->isPointerTy();
 }
 
 /// getTypeSizeInBits - Return the size in bits of the specified type,
@@ -2306,12 +2319,12 @@ uint64_t ScalarEvolution::getTypeSizeInBits(const Type *Ty) const {
     return TD->getTypeSizeInBits(Ty);
 
   // Integer types have fixed sizes.
-  if (Ty->isInteger())
+  if (Ty->isIntegerTy())
     return Ty->getPrimitiveSizeInBits();
 
   // The only other support type is pointer. Without TargetData, conservatively
   // assume pointers are 64-bit.
-  assert(isa<PointerType>(Ty) && "isSCEVable permitted a non-SCEVable type!");
+  assert(Ty->isPointerTy() && "isSCEVable permitted a non-SCEVable type!");
   return 64;
 }
 
@@ -2322,11 +2335,11 @@ uint64_t ScalarEvolution::getTypeSizeInBits(const Type *Ty) const {
 const Type *ScalarEvolution::getEffectiveSCEVType(const Type *Ty) const {
   assert(isSCEVable(Ty) && "Type is not SCEVable!");
 
-  if (Ty->isInteger())
+  if (Ty->isIntegerTy())
     return Ty;
 
   // The only other support type is pointer.
-  assert(isa<PointerType>(Ty) && "Unexpected non-pointer non-integer type!");
+  assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
   if (TD) return TD->getIntPtrType(getContext());
 
   // Without TargetData, conservatively assume pointers are 64-bit.
@@ -2397,8 +2410,8 @@ const SCEV *
 ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V,
                                          const Type *Ty) {
   const Type *SrcTy = V->getType();
-  assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) &&
-         (Ty->isInteger() || isa<PointerType>(Ty)) &&
+  assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
          "Cannot truncate or zero extend with non-integer arguments!");
   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
     return V;  // No conversion
@@ -2414,8 +2427,8 @@ const SCEV *
 ScalarEvolution::getTruncateOrSignExtend(const SCEV *V,
                                          const Type *Ty) {
   const Type *SrcTy = V->getType();
-  assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) &&
-         (Ty->isInteger() || isa<PointerType>(Ty)) &&
+  assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
          "Cannot truncate or zero extend with non-integer arguments!");
   if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
     return V;  // No conversion
@@ -2430,8 +2443,8 @@ ScalarEvolution::getTruncateOrSignExtend(const SCEV *V,
 const SCEV *
 ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, const Type *Ty) {
   const Type *SrcTy = V->getType();
-  assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) &&
-         (Ty->isInteger() || isa<PointerType>(Ty)) &&
+  assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
          "Cannot noop or zero extend with non-integer arguments!");
   assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
          "getNoopOrZeroExtend cannot truncate!");
@@ -2446,8 +2459,8 @@ ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, const Type *Ty) {
 const SCEV *
 ScalarEvolution::getNoopOrSignExtend(const SCEV *V, const Type *Ty) {
   const Type *SrcTy = V->getType();
-  assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) &&
-         (Ty->isInteger() || isa<PointerType>(Ty)) &&
+  assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
          "Cannot noop or sign extend with non-integer arguments!");
   assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
          "getNoopOrSignExtend cannot truncate!");
@@ -2463,8 +2476,8 @@ ScalarEvolution::getNoopOrSignExtend(const SCEV *V, const Type *Ty) {
 const SCEV *
 ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, const Type *Ty) {
   const Type *SrcTy = V->getType();
-  assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) &&
-         (Ty->isInteger() || isa<PointerType>(Ty)) &&
+  assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
          "Cannot noop or any extend with non-integer arguments!");
   assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
          "getNoopOrAnyExtend cannot truncate!");
@@ -2478,8 +2491,8 @@ ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, const Type *Ty) {
 const SCEV *
 ScalarEvolution::getTruncateOrNoop(const SCEV *V, const Type *Ty) {
   const Type *SrcTy = V->getType();
-  assert((SrcTy->isInteger() || isa<PointerType>(SrcTy)) &&
-         (Ty->isInteger() || isa<PointerType>(Ty)) &&
+  assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
          "Cannot truncate or noop with non-integer arguments!");
   assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) &&
          "getTruncateOrNoop cannot extend!");
@@ -2536,12 +2549,12 @@ PushDefUseChildren(Instruction *I,
 /// the Scalars map if they reference SymName. This is used during PHI
 /// resolution.
 void
-ScalarEvolution::ForgetSymbolicName(Instruction *I, const SCEV *SymName) {
+ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
   SmallVector<Instruction *, 16> Worklist;
-  PushDefUseChildren(I, Worklist);
+  PushDefUseChildren(PN, Worklist);
 
   SmallPtrSet<Instruction *, 8> Visited;
-  Visited.insert(I);
+  Visited.insert(PN);
   while (!Worklist.empty()) {
     Instruction *I = Worklist.pop_back_val();
     if (!Visited.insert(I)) continue;
@@ -2551,16 +2564,19 @@ ScalarEvolution::ForgetSymbolicName(Instruction *I, const SCEV *SymName) {
     if (It != Scalars.end()) {
       // Short-circuit the def-use traversal if the symbolic name
       // ceases to appear in expressions.
-      if (!It->second->hasOperand(SymName))
+      if (It->second != SymName && !It->second->hasOperand(SymName))
         continue;
 
       // SCEVUnknown for a PHI either means that it has an unrecognized
-      // structure, or it's a PHI that's in the progress of being computed
-      // by createNodeForPHI.  In the former case, additional loop trip
-      // count information isn't going to change anything. In the later
-      // case, createNodeForPHI will perform the necessary updates on its
-      // own when it gets to that point.
-      if (!isa<PHINode>(I) || !isa<SCEVUnknown>(It->second)) {
+      // structure, it's a PHI that's in the progress of being computed
+      // by createNodeForPHI, or it's a single-value PHI. In the first case,
+      // additional loop trip count information isn't going to change anything.
+      // In the second case, createNodeForPHI will perform the necessary
+      // updates on its own when it gets to that point. In the third, we do
+      // want to forget the SCEVUnknown.
+      if (!isa<PHINode>(I) ||
+          !isa<SCEVUnknown>(It->second) ||
+          (I != PN && It->second == SymName)) {
         ValuesAtScopes.erase(It->second);
         Scalars.erase(It);
       }
@@ -2683,9 +2699,21 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
         return SymbolicName;
       }
 
-  // It's tempting to recognize PHIs with a unique incoming value, however
-  // this leads passes like indvars to break LCSSA form. Fortunately, such
-  // PHIs are rare, as instcombine zaps them.
+  // If the PHI has a single incoming value, follow that value, unless the
+  // PHI's incoming blocks are in a different loop, in which case doing so
+  // risks breaking LCSSA form. Instcombine would normally zap these, but
+  // it doesn't have DominatorTree information, so it may miss cases.
+  if (Value *V = PN->hasConstantValue(DT)) {
+    bool AllSameLoop = true;
+    Loop *PNLoop = LI->getLoopFor(PN->getParent());
+    for (size_t i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (LI->getLoopFor(PN->getIncomingBlock(i)) != PNLoop) {
+        AllSameLoop = false;
+        break;
+      }
+    if (AllSameLoop)
+      return getSCEV(V);
+  }
 
   // If it's not a loop phi, we can't handle it yet.
   return getUnknown(PN);
@@ -2718,7 +2746,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
     } else {
       // For an array, add the element offset, explicitly scaled.
       const SCEV *LocalOffset = getSCEV(Index);
-      // Getelementptr indicies are signed.
+      // Getelementptr indices are signed.
       LocalOffset = getTruncateOrSignExtend(LocalOffset, IntPtrTy);
       // Lower "inbounds" GEPs to NSW arithmetic.
       LocalOffset = getMulExpr(LocalOffset, getSizeOfExpr(*GTI),
@@ -2921,7 +2949,6 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
 
   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
     // For a SCEVUnknown, ask ValueTracking.
-    unsigned BitWidth = getTypeSizeInBits(U->getType());
     APInt Mask = APInt::getAllOnesValue(BitWidth);
     APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
     ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones, TD);
@@ -3053,7 +3080,7 @@ ScalarEvolution::getSignedRange(const SCEV *S) {
 
   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
     // For a SCEVUnknown, ask ValueTracking.
-    if (!U->getValue()->getType()->isInteger() && !TD)
+    if (!U->getValue()->getType()->isIntegerTy() && !TD)
       return ConservativeResult;
     unsigned NS = ComputeNumSignBits(U->getValue(), TD);
     if (NS == 1)
@@ -3193,7 +3220,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
               const Type *Z0Ty = Z0->getType();
               unsigned Z0TySize = getTypeSizeInBits(Z0Ty);
 
-              // If C is a low-bits mask, the zero extend is zerving to
+              // If C is a low-bits mask, the zero extend is serving to
               // mask off the high bits. Complement the operand and
               // re-apply the zext.
               if (APIntOps::isMask(Z0TySize, CI->getValue()))
@@ -3378,7 +3405,7 @@ PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) {
 const ScalarEvolution::BackedgeTakenInfo &
 ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
   // Initially insert a CouldNotCompute for this loop. If the insertion
-  // succeeds, procede to actually compute a backedge-taken count and
+  // succeeds, proceed to actually compute a backedge-taken count and
   // update the value. The temporary CouldNotCompute value tells SCEV
   // code elsewhere that it shouldn't attempt to request a new
   // backedge-taken count, which could result in infinite recursion.
@@ -3470,6 +3497,35 @@ void ScalarEvolution::forgetLoop(const Loop *L) {
   }
 }
 
+/// forgetValue - This method should be called by the client when it has
+/// changed a value in a way that may effect its value, or which may
+/// disconnect it from a def-use chain linking it to a loop.
+void ScalarEvolution::forgetValue(Value *V) {
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) return;
+
+  // Drop information about expressions based on loop-header PHIs.
+  SmallVector<Instruction *, 16> Worklist;
+  Worklist.push_back(I);
+
+  SmallPtrSet<Instruction *, 8> Visited;
+  while (!Worklist.empty()) {
+    I = Worklist.pop_back_val();
+    if (!Visited.insert(I)) continue;
+
+    std::map<SCEVCallbackVH, const SCEV *>::iterator It =
+      Scalars.find(static_cast<Value *>(I));
+    if (It != Scalars.end()) {
+      ValuesAtScopes.erase(It->second);
+      Scalars.erase(It);
+      if (PHINode *PN = dyn_cast<PHINode>(I))
+        ConstantEvolutionLoopExitValue.erase(PN);
+    }
+
+    PushDefUseChildren(I, Worklist);
+  }
+}
+
 /// ComputeBackedgeTakenCount - Compute the number of times the backedge
 /// of the specified loop will execute.
 ScalarEvolution::BackedgeTakenInfo
@@ -3566,7 +3622,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExit(const Loop *L,
       return getCouldNotCompute();
   }
 
-  // Procede to the next level to examine the exit condition expression.
+  // Proceed to the next level to examine the exit condition expression.
   return ComputeBackedgeTakenCountFromExitCond(L, ExitBr->getCondition(),
                                                ExitBr->getSuccessor(0),
                                                ExitBr->getSuccessor(1));
@@ -3655,10 +3711,23 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
   }
 
   // With an icmp, it may be feasible to compute an exact backedge-taken count.
-  // Procede to the next level to examine the icmp.
+  // Proceed to the next level to examine the icmp.
   if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond))
     return ComputeBackedgeTakenCountFromExitCondICmp(L, ExitCondICmp, TBB, FBB);
 
+  // Check for a constant condition. These are normally stripped out by
+  // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
+  // preserve the CFG and is temporarily leaving constant conditions
+  // in place.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(ExitCond)) {
+    if (L->contains(FBB) == !CI->getZExtValue())
+      // The backedge is always taken.
+      return getCouldNotCompute();
+    else
+      // The backedge is never taken.
+      return getIntegerSCEV(0, CI->getType());
+  }
+
   // If it's not an integer or pointer comparison then compute it the hard way.
   return ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB));
 }
@@ -3682,14 +3751,10 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
   // Handle common loops like: for (X = "string"; *X; ++X)
   if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0)))
     if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {
-      const SCEV *ItCnt =
+      BackedgeTakenInfo ItCnt =
         ComputeLoadConstantCompareBackedgeTakenCount(LI, RHS, L, Cond);
-      if (!isa<SCEVCouldNotCompute>(ItCnt)) {
-        unsigned BitWidth = getTypeSizeInBits(ItCnt->getType());
-        return BackedgeTakenInfo(ItCnt,
-                                 isa<SCEVConstant>(ItCnt) ? ItCnt :
-                                   getConstant(APInt::getMaxValue(BitWidth)-1));
-      }
+      if (ItCnt.hasAnyInfo())
+        return ItCnt;
     }
 
   const SCEV *LHS = getSCEV(ExitCond->getOperand(0));
@@ -3723,14 +3788,14 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
   switch (Cond) {
   case ICmpInst::ICMP_NE: {                     // while (X != Y)
     // Convert to: while (X-Y != 0)
-    const SCEV *TC = HowFarToZero(getMinusSCEV(LHS, RHS), L);
-    if (!isa<SCEVCouldNotCompute>(TC)) return TC;
+    BackedgeTakenInfo BTI = HowFarToZero(getMinusSCEV(LHS, RHS), L);
+    if (BTI.hasAnyInfo()) return BTI;
     break;
   }
   case ICmpInst::ICMP_EQ: {                     // while (X == Y)
     // Convert to: while (X-Y == 0)
-    const SCEV *TC = HowFarToNonZero(getMinusSCEV(LHS, RHS), L);
-    if (!isa<SCEVCouldNotCompute>(TC)) return TC;
+    BackedgeTakenInfo BTI = HowFarToNonZero(getMinusSCEV(LHS, RHS), L);
+    if (BTI.hasAnyInfo()) return BTI;
     break;
   }
   case ICmpInst::ICMP_SLT: {
@@ -3817,7 +3882,7 @@ GetAddressedElementFromGlobal(GlobalVariable *GV,
 /// ComputeLoadConstantCompareBackedgeTakenCount - Given an exit condition of
 /// 'icmp op load X, cst', try to see if we can compute the backedge
 /// execution count.
-const SCEV *
+ScalarEvolution::BackedgeTakenInfo
 ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount(
                                                 LoadInst *LI,
                                                 Constant *RHS,
@@ -3826,6 +3891,7 @@ ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount(
   if (LI->isVolatile()) return getCouldNotCompute();
 
   // Check to see if the loaded pointer is a getelementptr of a global.
+  // TODO: Use SCEV instead of manually grubbing with GEPs.
   GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0));
   if (!GEP) return getCouldNotCompute();
 
@@ -4175,14 +4241,15 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
           }
         }
 
-        Constant *C;
+        Constant *C = 0;
         if (const CmpInst *CI = dyn_cast<CmpInst>(I))
           C = ConstantFoldCompareInstOperands(CI->getPredicate(),
                                               Operands[0], Operands[1], TD);
         else
           C = ConstantFoldInstOperands(I->getOpcode(), I->getType(),
                                        &Operands[0], Operands.size(), TD);
-        return getSCEV(C);
+        if (C)
+          return getSCEV(C);
       }
     }
 
@@ -4390,7 +4457,8 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
 
 /// HowFarToZero - Return the number of times a backedge comparing the specified
 /// value to zero will execute.  If not computable, return CouldNotCompute.
-const SCEV *ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
   // If the value is a constant
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
     // If the value is already zero, the branch will execute zero times.
@@ -4435,7 +4503,7 @@ const SCEV *ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
                                             -StartC->getValue()->getValue(),
                                             *this);
     }
-  } else if (AddRec->isQuadratic() && AddRec->getType()->isInteger()) {
+  } else if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) {
     // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of
     // the quadratic equation to solve it.
     std::pair<const SCEV *,const SCEV *> Roots = SolveQuadraticEquation(AddRec,
@@ -4470,7 +4538,8 @@ const SCEV *ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
 /// HowFarToNonZero - Return the number of times a backedge checking the
 /// specified value for nonzero will execute.  If not computable, return
 /// CouldNotCompute
-const SCEV *ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) {
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) {
   // Loops that look like: while (X == 0) are very strange indeed.  We don't
   // handle them yet except for the trivial case.  This could be expanded in the
   // future as needed.
@@ -4711,7 +4780,7 @@ bool ScalarEvolution::isImpliedCond(Value *CondValue,
                                     ICmpInst::Predicate Pred,
                                     const SCEV *LHS, const SCEV *RHS,
                                     bool Inverse) {
-  // Recursivly handle And and Or conditions.
+  // Recursively handle And and Or conditions.
   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CondValue)) {
     if (BO->getOpcode() == Instruction::And) {
       if (!Inverse)
@@ -4914,7 +4983,7 @@ bool ScalarEvolution::isImpliedCond(Value *CondValue,
 }
 
 /// isImpliedCondOperands - Test whether the condition described by Pred,
-/// LHS, and RHS is true whenever the condition desribed by Pred, FoundLHS,
+/// LHS, and RHS is true whenever the condition described by Pred, FoundLHS,
 /// and FoundRHS is true.
 bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
                                             const SCEV *LHS, const SCEV *RHS,
@@ -4929,7 +4998,7 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
 }
 
 /// isImpliedCondOperandsHelper - Test whether the condition described by
-/// Pred, LHS, and RHS is true whenever the condition desribed by Pred,
+/// Pred, LHS, and RHS is true whenever the condition described by Pred,
 /// FoundLHS, and FoundRHS is true.
 bool
 ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
@@ -5087,7 +5156,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
 
     // If MaxEnd is within a step of the maximum integer value in its type,
     // adjust it down to the minimum value which would produce the same effect.
-    // This allows the subsequent ceiling divison of (N+(step-1))/step to
+    // This allows the subsequent ceiling division of (N+(step-1))/step to
     // compute the correct value.
     const SCEV *StepMinusOne = getMinusSCEV(Step,
                                             getIntegerSCEV(1, Step->getType()));
@@ -5304,8 +5373,8 @@ ScalarEvolution::ScalarEvolution()
 bool ScalarEvolution::runOnFunction(Function &F) {
   this->F = &F;
   LI = &getAnalysis<LoopInfo>();
-  DT = &getAnalysis<DominatorTree>();
   TD = getAnalysisIfAvailable<TargetData>();
+  DT = &getAnalysis<DominatorTree>();
   return false;
 }
 
@@ -5364,7 +5433,7 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
 }
 
 void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
-  // ScalarEvolution's implementaiton of the print method is to print
+  // ScalarEvolution's implementation of the print method is to print
   // out SCEV values of all instructions that are interesting. Doing
   // this potentially causes it to create new SCEV objects though,
   // which technically conflicts with the const qualifier. This isn't
diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index 498c4a8..17b254f 100644
--- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -10,6 +10,10 @@
 // This file defines the ScalarEvolutionAliasAnalysis pass, which implements a
 // simple alias analysis implemented in terms of ScalarEvolution queries.
 //
+// This differs from traditional loop dependence analysis in that it tests
+// for dependencies within a single iteration of a loop, rather than
+// dependences between different iterations.
+//
 // ScalarEvolution has a more complete understanding of pointer arithmetic
 // than BasicAliasAnalysis' collection of ad-hoc analyses.
 //
@@ -41,7 +45,7 @@ namespace {
         return (AliasAnalysis*)this;
       return this;
     }
-                                         
+
   private:
     virtual void getAnalysisUsage(AnalysisUsage &AU) const;
     virtual bool runOnFunction(Function &F);
@@ -89,7 +93,7 @@ ScalarEvolutionAliasAnalysis::GetBaseValue(const SCEV *S) {
   } else if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
     // If there's a pointer operand, it'll be sorted at the end of the list.
     const SCEV *Last = A->getOperand(A->getNumOperands()-1);
-    if (isa<PointerType>(Last->getType()))
+    if (Last->getType()->isPointerTy())
       return GetBaseValue(Last);
   } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
     // This is a leaf node.
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 4310e3c..e27da96 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -15,6 +15,7 @@
 
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/ADT/STLExtras.h"
@@ -137,6 +138,10 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
   if (IP != BlockBegin) {
     --IP;
     for (; ScanLimit; --IP, --ScanLimit) {
+      // Don't count dbg.value against the ScanLimit, to avoid perturbing the
+      // generated code.
+      if (isa<DbgInfoIntrinsic>(IP))
+        ScanLimit++;
       if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
           IP->getOperand(1) == RHS)
         return IP;
@@ -144,15 +149,34 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
     }
   }
 
+  // Save the original insertion point so we can restore it when we're done.
+  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+
+  // Move the insertion point out of as many loops as we can.
+  while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
+    if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;
+    BasicBlock *Preheader = L->getLoopPreheader();
+    if (!Preheader) break;
+
+    // Ok, move up a level.
+    Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
+  }
+
   // If we haven't found this binop, insert it.
   Value *BO = Builder.CreateBinOp(Opcode, LHS, RHS, "tmp");
   rememberInstruction(BO);
+
+  // Restore the original insert point.
+  if (SaveInsertBB)
+    restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
   return BO;
 }
 
 /// FactorOutConstant - Test if S is divisible by Factor, using signed
 /// division. If so, update S with Factor divided out and return true.
-/// S need not be evenly divisble if a reasonable remainder can be
+/// S need not be evenly divisible if a reasonable remainder can be
 /// computed.
 /// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made
 /// unnecessary; in its place, just signed-divide Ops[i] by the scale and
@@ -462,7 +486,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
       break;
   }
 
-  // If none of the operands were convertable to proper GEP indices, cast
+  // If none of the operands were convertible to proper GEP indices, cast
   // the base to i8* and do an ugly getelementptr with that. It's still
   // better than ptrtoint+arithmetic+inttoptr at least.
   if (!AnyNonZeroIndices) {
@@ -486,6 +510,10 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
     if (IP != BlockBegin) {
       --IP;
       for (; ScanLimit; --IP, --ScanLimit) {
+        // Don't count dbg.value against the ScanLimit, to avoid perturbing the
+        // generated code.
+        if (isa<DbgInfoIntrinsic>(IP))
+          ScanLimit++;
         if (IP->getOpcode() == Instruction::GetElementPtr &&
             IP->getOperand(0) == V && IP->getOperand(1) == Idx)
           return IP;
@@ -493,12 +521,56 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
       }
     }
 
+    // Save the original insertion point so we can restore it when we're done.
+    BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+    BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+
+    // Move the insertion point out of as many loops as we can.
+    while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
+      if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break;
+      BasicBlock *Preheader = L->getLoopPreheader();
+      if (!Preheader) break;
+
+      // Ok, move up a level.
+      Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
+    }
+
     // Emit a GEP.
     Value *GEP = Builder.CreateGEP(V, Idx, "uglygep");
     rememberInstruction(GEP);
+
+    // Restore the original insert point.
+    if (SaveInsertBB)
+      restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
     return GEP;
   }
 
+  // Save the original insertion point so we can restore it when we're done.
+  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+
+  // Move the insertion point out of as many loops as we can.
+  while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
+    if (!L->isLoopInvariant(V)) break;
+
+    bool AnyIndexNotLoopInvariant = false;
+    for (SmallVectorImpl<Value *>::const_iterator I = GepIndices.begin(),
+         E = GepIndices.end(); I != E; ++I)
+      if (!L->isLoopInvariant(*I)) {
+        AnyIndexNotLoopInvariant = true;
+        break;
+      }
+    if (AnyIndexNotLoopInvariant)
+      break;
+
+    BasicBlock *Preheader = L->getLoopPreheader();
+    if (!Preheader) break;
+
+    // Ok, move up a level.
+    Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
+  }
+
   // Insert a pretty getelementptr. Note that this GEP is not marked inbounds,
   // because ScalarEvolution may have changed the address arithmetic to
   // compute a value which is beyond the end of the allocated object.
@@ -511,6 +583,11 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
                                  "scevgep");
   Ops.push_back(SE.getUnknown(GEP));
   rememberInstruction(GEP);
+
+  // Restore the original insert point.
+  if (SaveInsertBB)
+    restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
   return expand(SE.getAddExpr(Ops));
 }
 
@@ -528,70 +605,179 @@ static bool isNonConstantNegative(const SCEV *F) {
   return SC->getValue()->getValue().isNegative();
 }
 
-Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
-  int NumOperands = S->getNumOperands();
-  const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
+/// SCEV expansion. If they are nested, this is the most nested. If they are
+/// neighboring, pick the later.
+static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,
+                                        DominatorTree &DT) {
+  if (!A) return B;
+  if (!B) return A;
+  if (A->contains(B)) return B;
+  if (B->contains(A)) return A;
+  if (DT.dominates(A->getHeader(), B->getHeader())) return B;
+  if (DT.dominates(B->getHeader(), A->getHeader())) return A;
+  return A; // Arbitrarily break the tie.
+}
 
-  // Find the index of an operand to start with. Choose the operand with
-  // pointer type, if there is one, or the last operand otherwise.
-  int PIdx = 0;
-  for (; PIdx != NumOperands - 1; ++PIdx)
-    if (isa<PointerType>(S->getOperand(PIdx)->getType())) break;
+/// GetRelevantLoop - Get the most relevant loop associated with the given
+/// expression, according to PickMostRelevantLoop.
+static const Loop *GetRelevantLoop(const SCEV *S, LoopInfo &LI,
+                                   DominatorTree &DT) {
+  if (isa<SCEVConstant>(S))
+    return 0;
+  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+    if (const Instruction *I = dyn_cast<Instruction>(U->getValue()))
+      return LI.getLoopFor(I->getParent());
+    return 0;
+  }
+  if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) {
+    const Loop *L = 0;
+    if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+      L = AR->getLoop();
+    for (SCEVNAryExpr::op_iterator I = N->op_begin(), E = N->op_end();
+         I != E; ++I)
+      L = PickMostRelevantLoop(L, GetRelevantLoop(*I, LI, DT), DT);
+    return L;
+  }
+  if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
+    return GetRelevantLoop(C->getOperand(), LI, DT);
+  if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S))
+    return PickMostRelevantLoop(GetRelevantLoop(D->getLHS(), LI, DT),
+                                GetRelevantLoop(D->getRHS(), LI, DT),
+                                DT);
+  llvm_unreachable("Unexpected SCEV type!");
+}
 
-  // Expand code for the operand that we chose.
-  Value *V = expand(S->getOperand(PIdx));
+/// LoopCompare - Compare loops by PickMostRelevantLoop.
+class LoopCompare {
+  DominatorTree &DT;
+public:
+  explicit LoopCompare(DominatorTree &dt) : DT(dt) {}
+
+  bool operator()(std::pair<const Loop *, const SCEV *> LHS,
+                  std::pair<const Loop *, const SCEV *> RHS) const {
+    // Compare loops with PickMostRelevantLoop.
+    if (LHS.first != RHS.first)
+      return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first;
+
+    // If one operand is a non-constant negative and the other is not,
+    // put the non-constant negative on the right so that a sub can
+    // be used instead of a negate and add.
+    if (isNonConstantNegative(LHS.second)) {
+      if (!isNonConstantNegative(RHS.second))
+        return false;
+    } else if (isNonConstantNegative(RHS.second))
+      return true;
 
-  // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the
-  // comments on expandAddToGEP for details.
-  if (const PointerType *PTy = dyn_cast<PointerType>(V->getType())) {
-    // Take the operand at PIdx out of the list.
-    const SmallVectorImpl<const SCEV *> &Ops = S->getOperands();
-    SmallVector<const SCEV *, 8> NewOps;
-    NewOps.insert(NewOps.end(), Ops.begin(), Ops.begin() + PIdx);
-    NewOps.insert(NewOps.end(), Ops.begin() + PIdx + 1, Ops.end());
-    // Make a GEP.
-    return expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, V);
+    // Otherwise they are equivalent according to this comparison.
+    return false;
   }
+};
 
-  // Otherwise, we'll expand the rest of the SCEVAddExpr as plain integer
-  // arithmetic.
-  V = InsertNoopCastOfTo(V, Ty);
+Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
+  const Type *Ty = SE.getEffectiveSCEVType(S->getType());
 
-  // Emit a bunch of add instructions
-  for (int i = NumOperands-1; i >= 0; --i) {
-    if (i == PIdx) continue;
-    const SCEV *Op = S->getOperand(i);
-    if (isNonConstantNegative(Op)) {
+  // Collect all the add operands in a loop, along with their associated loops.
+  // Iterate in reverse so that constants are emitted last, all else equal, and
+  // so that pointer operands are inserted first, which the code below relies on
+  // to form more involved GEPs.
+  SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
+  for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(S->op_end()),
+       E(S->op_begin()); I != E; ++I)
+    OpsAndLoops.push_back(std::make_pair(GetRelevantLoop(*I, *SE.LI, *SE.DT),
+                                         *I));
+
+  // Sort by loop. Use a stable sort so that constants follow non-constants and
+  // pointer operands precede non-pointer operands.
+  std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT));
+
+  // Emit instructions to add all the operands. Hoist as much as possible
+  // out of loops, and form meaningful getelementptrs where possible.
+  Value *Sum = 0;
+  for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator
+       I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) {
+    const Loop *CurLoop = I->first;
+    const SCEV *Op = I->second;
+    if (!Sum) {
+      // This is the first operand. Just expand it.
+      Sum = expand(Op);
+      ++I;
+    } else if (const PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) {
+      // The running sum expression is a pointer. Try to form a getelementptr
+      // at this level with that as the base.
+      SmallVector<const SCEV *, 4> NewOps;
+      for (; I != E && I->first == CurLoop; ++I)
+        NewOps.push_back(I->second);
+      Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum);
+    } else if (const PointerType *PTy = dyn_cast<PointerType>(Op->getType())) {
+      // The running sum is an integer, and there's a pointer at this level.
+      // Try to form a getelementptr.
+      SmallVector<const SCEV *, 4> NewOps;
+      NewOps.push_back(SE.getUnknown(Sum));
+      for (++I; I != E && I->first == CurLoop; ++I)
+        NewOps.push_back(I->second);
+      Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op));
+    } else if (isNonConstantNegative(Op)) {
+      // Instead of doing a negate and add, just do a subtract.
       Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty);
-      V = InsertBinop(Instruction::Sub, V, W);
+      Sum = InsertNoopCastOfTo(Sum, Ty);
+      Sum = InsertBinop(Instruction::Sub, Sum, W);
+      ++I;
     } else {
+      // A simple add.
       Value *W = expandCodeFor(Op, Ty);
-      V = InsertBinop(Instruction::Add, V, W);
+      Sum = InsertNoopCastOfTo(Sum, Ty);
+      // Canonicalize a constant to the RHS.
+      if (isa<Constant>(Sum)) std::swap(Sum, W);
+      Sum = InsertBinop(Instruction::Add, Sum, W);
+      ++I;
     }
   }
-  return V;
+
+  return Sum;
 }
 
 Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
   const Type *Ty = SE.getEffectiveSCEVType(S->getType());
-  int FirstOp = 0;  // Set if we should emit a subtract.
-  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getOperand(0)))
-    if (SC->getValue()->isAllOnesValue())
-      FirstOp = 1;
-
-  int i = S->getNumOperands()-2;
-  Value *V = expandCodeFor(S->getOperand(i+1), Ty);
-
-  // Emit a bunch of multiply instructions
-  for (; i >= FirstOp; --i) {
-    Value *W = expandCodeFor(S->getOperand(i), Ty);
-    V = InsertBinop(Instruction::Mul, V, W);
+
+  // Collect all the mul operands in a loop, along with their associated loops.
+  // Iterate in reverse so that constants are emitted last, all else equal.
+  SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
+  for (std::reverse_iterator<SCEVMulExpr::op_iterator> I(S->op_end()),
+       E(S->op_begin()); I != E; ++I)
+    OpsAndLoops.push_back(std::make_pair(GetRelevantLoop(*I, *SE.LI, *SE.DT),
+                                         *I));
+
+  // Sort by loop. Use a stable sort so that constants follow non-constants.
+  std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT));
+
+  // Emit instructions to mul all the operands. Hoist as much as possible
+  // out of loops.
+  Value *Prod = 0;
+  for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator
+       I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) {
+    const SCEV *Op = I->second;
+    if (!Prod) {
+      // This is the first operand. Just expand it.
+      Prod = expand(Op);
+      ++I;
+    } else if (Op->isAllOnesValue()) {
+      // Instead of doing a multiply by negative one, just do a negate.
+      Prod = InsertNoopCastOfTo(Prod, Ty);
+      Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod);
+      ++I;
+    } else {
+      // A simple mul.
+      Value *W = expandCodeFor(Op, Ty);
+      Prod = InsertNoopCastOfTo(Prod, Ty);
+      // Canonicalize a constant to the RHS.
+      if (isa<Constant>(Prod)) std::swap(Prod, W);
+      Prod = InsertBinop(Instruction::Mul, Prod, W);
+      ++I;
+    }
   }
 
-  // -1 * ...  --->  0 - ...
-  if (FirstOp == 1)
-    V = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), V);
-  return V;
+  return Prod;
 }
 
 Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
@@ -641,8 +827,65 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
   // Reuse a previously-inserted PHI, if present.
   for (BasicBlock::iterator I = L->getHeader()->begin();
        PHINode *PN = dyn_cast<PHINode>(I); ++I)
-    if (isInsertedInstruction(PN) && SE.getSCEV(PN) == Normalized)
-      return PN;
+    if (SE.isSCEVable(PN->getType()) &&
+        (SE.getEffectiveSCEVType(PN->getType()) ==
+         SE.getEffectiveSCEVType(Normalized->getType())) &&
+        SE.getSCEV(PN) == Normalized)
+      if (BasicBlock *LatchBlock = L->getLoopLatch()) {
+        Instruction *IncV =
+          cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock));
+
+        // Determine if this is a well-behaved chain of instructions leading
+        // back to the PHI. It probably will be, if we're scanning an inner
+        // loop already visited by LSR for example, but it wouldn't have
+        // to be.
+        do {
+          if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV)) {
+            IncV = 0;
+            break;
+          }
+          // If any of the operands don't dominate the insert position, bail.
+          // Addrec operands are always loop-invariant, so this can only happen
+          // if there are instructions which haven't been hoisted.
+          for (User::op_iterator OI = IncV->op_begin()+1,
+               OE = IncV->op_end(); OI != OE; ++OI)
+            if (Instruction *OInst = dyn_cast<Instruction>(OI))
+              if (!SE.DT->dominates(OInst, IVIncInsertPos)) {
+                IncV = 0;
+                break;
+              }
+          if (!IncV)
+            break;
+          // Advance to the next instruction.
+          IncV = dyn_cast<Instruction>(IncV->getOperand(0));
+          if (!IncV)
+            break;
+          if (IncV->mayHaveSideEffects()) {
+            IncV = 0;
+            break;
+          }
+        } while (IncV != PN);
+
+        if (IncV) {
+          // Ok, the add recurrence looks usable.
+          // Remember this PHI, even in post-inc mode.
+          InsertedValues.insert(PN);
+          // Remember the increment.
+          IncV = cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock));
+          rememberInstruction(IncV);
+          if (L == IVIncInsertLoop)
+            do {
+              if (SE.DT->dominates(IncV, IVIncInsertPos))
+                break;
+              // Make sure the increment is where we want it. But don't move it
+              // down past a potential existing post-inc user.
+              IncV->moveBefore(IVIncInsertPos);
+              IVIncInsertPos = IncV;
+              IncV = cast<Instruction>(IncV->getOperand(0));
+            } while (IncV != PN);
+          return PN;
+        }
+      }
 
   // Save the original insertion point so we can restore it when we're done.
   BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
@@ -658,7 +901,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
   // negative, insert a sub instead of an add for the increment (unless it's a
   // constant, because subtracts of constants are canonicalized to adds).
   const SCEV *Step = Normalized->getStepRecurrence(SE);
-  bool isPointer = isa<PointerType>(ExpandTy);
+  bool isPointer = ExpandTy->isPointerTy();
   bool isNegative = !isPointer && isNonConstantNegative(Step);
   if (isNegative)
     Step = SE.getNegativeSCEV(Step);
@@ -713,7 +956,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
 
   // Restore the original insert point.
   if (SaveInsertBB)
-    Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
+    restoreInsertPoint(SaveInsertBB, SaveInsertPt);
 
   // Remember this PHI, even in post-inc mode.
   InsertedValues.insert(PN);
@@ -763,7 +1006,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
   const Type *ExpandTy = PostLoopScale ? IntTy : STy;
   PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy);
 
-  // Accomodate post-inc mode, if necessary.
+  // Accommodate post-inc mode, if necessary.
   Value *Result;
   if (L != PostIncLoop)
     Result = PN;
@@ -776,6 +1019,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
 
   // Re-apply any non-loop-dominating scale.
   if (PostLoopScale) {
+    Result = InsertNoopCastOfTo(Result, IntTy);
     Result = Builder.CreateMul(Result,
                                expandCodeFor(PostLoopScale, IntTy));
     rememberInstruction(Result);
@@ -787,6 +1031,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
       const SCEV *const OffsetArray[1] = { PostLoopOffset };
       Result = expandAddToGEP(OffsetArray, OffsetArray+1, PTy, IntTy, Result);
     } else {
+      Result = InsertNoopCastOfTo(Result, IntTy);
       Result = Builder.CreateAdd(Result,
                                  expandCodeFor(PostLoopOffset, IntTy));
       rememberInstruction(Result);
@@ -806,7 +1051,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
   PHINode *CanonicalIV = 0;
   if (PHINode *PN = L->getCanonicalInductionVariable())
     if (SE.isSCEVable(PN->getType()) &&
-        isa<IntegerType>(SE.getEffectiveSCEVType(PN->getType())) &&
+        SE.getEffectiveSCEVType(PN->getType())->isIntegerTy() &&
         SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty))
       CanonicalIV = PN;
 
@@ -827,7 +1072,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
     while (isa<PHINode>(NewInsertPt)) ++NewInsertPt;
     V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0,
                       NewInsertPt);
-    Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
+    restoreInsertPoint(SaveInsertBB, SaveInsertPt);
     return V;
   }
 
@@ -1022,13 +1267,24 @@ Value *SCEVExpander::expand(const SCEV *S) {
        L = L->getParentLoop())
     if (S->isLoopInvariant(L)) {
       if (!L) break;
-      if (BasicBlock *Preheader = L->getLoopPreheader())
+      if (BasicBlock *Preheader = L->getLoopPreheader()) {
         InsertPt = Preheader->getTerminator();
+        BasicBlock::iterator IP = InsertPt;
+        // Back past any debug info instructions.  Sometimes we inserted
+        // something earlier before debug info but after any real instructions.
+        // This should behave the same as if debug info was not present.
+        while (IP != Preheader->begin()) {
+          --IP;
+          if (!isa<DbgInfoIntrinsic>(IP))
+            break;
+          InsertPt = IP;
+        }
+      }
     } else {
       // If the SCEV is computable at this level, insert it into the header
       // after the PHIs (and after any other instructions that we've inserted
       // there) so that it is guaranteed to dominate any user inside the loop.
-      if (L && S->hasComputableLoopEvolution(L))
+      if (L && S->hasComputableLoopEvolution(L) && L != PostIncLoop)
         InsertPt = L->getHeader()->getFirstNonPHI();
       while (isInsertedInstruction(InsertPt))
         InsertPt = llvm::next(BasicBlock::iterator(InsertPt));
@@ -1053,10 +1309,32 @@ Value *SCEVExpander::expand(const SCEV *S) {
   if (!PostIncLoop)
     InsertedExpressions[std::make_pair(S, InsertPt)] = V;
 
-  Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
+  restoreInsertPoint(SaveInsertBB, SaveInsertPt);
   return V;
 }
 
+void SCEVExpander::rememberInstruction(Value *I) {
+  if (!PostIncLoop)
+    InsertedValues.insert(I);
+
+  // If we just claimed an existing instruction and that instruction had
+  // been the insert point, adjust the insert point forward so that 
+  // subsequently inserted code will be dominated.
+  if (Builder.GetInsertPoint() == I) {
+    BasicBlock::iterator It = cast<Instruction>(I);
+    do { ++It; } while (isInsertedInstruction(It));
+    Builder.SetInsertPoint(Builder.GetInsertBlock(), It);
+  }
+}
+
+void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) {
+  // If we acquired more instructions since the old insert point was saved,
+  // advance past them.
+  while (isInsertedInstruction(I)) ++I;
+
+  Builder.SetInsertPoint(BB, I);
+}
+
 /// getOrInsertCanonicalInductionVariable - This method returns the
 /// canonical induction variable of the specified type for the specified
 /// loop (inserting one if there is none).  A canonical induction variable
@@ -1064,13 +1342,13 @@ Value *SCEVExpander::expand(const SCEV *S) {
 Value *
 SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
                                                     const Type *Ty) {
-  assert(Ty->isInteger() && "Can only insert integer induction variables!");
+  assert(Ty->isIntegerTy() && "Can only insert integer induction variables!");
   const SCEV *H = SE.getAddRecExpr(SE.getIntegerSCEV(0, Ty),
                                    SE.getIntegerSCEV(1, Ty), L);
   BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
   BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
   Value *V = expandCodeFor(H, 0, L->getHeader()->begin());
   if (SaveInsertBB)
-    Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
+    restoreInsertPoint(SaveInsertBB, SaveInsertPt);
   return V;
 }
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index f9331e7..92cbb7c 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Target/TargetData.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include <cstring>
 using namespace llvm;
 
@@ -49,11 +50,11 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
   assert(V && "No Value?");
   assert(Depth <= MaxDepth && "Limit Search Depth");
   unsigned BitWidth = Mask.getBitWidth();
-  assert((V->getType()->isIntOrIntVector() || isa<PointerType>(V->getType())) &&
-         "Not integer or pointer type!");
+  assert((V->getType()->isIntOrIntVectorTy() || V->getType()->isPointerTy())
+         && "Not integer or pointer type!");
   assert((!TD ||
           TD->getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
-         (!V->getType()->isIntOrIntVector() ||
+         (!V->getType()->isIntOrIntVectorTy() ||
           V->getType()->getScalarSizeInBits() == BitWidth) &&
          KnownZero.getBitWidth() == BitWidth && 
          KnownOne.getBitWidth() == BitWidth &&
@@ -249,7 +250,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
     unsigned SrcBitWidth;
     // Note that we handle pointer operands here because of inttoptr/ptrtoint
     // which fall through here.
-    if (isa<PointerType>(SrcTy))
+    if (SrcTy->isPointerTy())
       SrcBitWidth = TD->getTypeSizeInBits(SrcTy);
     else
       SrcBitWidth = SrcTy->getScalarSizeInBits();
@@ -269,10 +270,10 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
   }
   case Instruction::BitCast: {
     const Type *SrcTy = I->getOperand(0)->getType();
-    if ((SrcTy->isInteger() || isa<PointerType>(SrcTy)) &&
+    if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
         // TODO: For now, not handling conversions like:
         // (bitcast i64 %x to <2 x i32>)
-        !isa<VectorType>(I->getType())) {
+        !I->getType()->isVectorTy()) {
       ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, TD,
                         Depth+1);
       return;
@@ -649,7 +650,7 @@ bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask,
 ///
 unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD,
                                   unsigned Depth) {
-  assert((TD || V->getType()->isIntOrIntVector()) &&
+  assert((TD || V->getType()->isIntOrIntVectorTy()) &&
          "ComputeNumSignBits requires a TargetData object to operate "
          "on non-integer values!");
   const Type *Ty = V->getType();
@@ -823,7 +824,7 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
 
   assert(V && "No Value?");
   assert(Depth <= MaxDepth && "Limit Search Depth");
-  assert(V->getType()->isInteger() && "Not integer or pointer type!");
+  assert(V->getType()->isIntegerTy() && "Not integer or pointer type!");
 
   const Type *T = V->getType();
 
@@ -980,7 +981,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
 /// may not be represented in the result.
 static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
                                   const TargetData *TD, unsigned Depth) {
-  assert(isa<IntegerType>(V->getType()) && "Not an integer value");
+  assert(V->getType()->isIntegerTy() && "Not an integer value");
 
   // Limit our recursion depth.
   if (Depth == 6) {
@@ -1253,7 +1254,7 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin,
   if (idx_begin == idx_end)
     return V;
   // We have indices, so V should have an indexable type
-  assert((isa<StructType>(V->getType()) || isa<ArrayType>(V->getType()))
+  assert((V->getType()->isStructTy() || V->getType()->isArrayTy())
          && "Not looking at a struct or array?");
   assert(ExtractValueInst::getIndexedType(V->getType(), idx_begin, idx_end)
          && "Invalid indices for type?");
@@ -1372,7 +1373,7 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
     // Make sure the index-ee is a pointer to array of i8.
     const PointerType *PT = cast<PointerType>(GEP->getOperand(0)->getType());
     const ArrayType *AT = dyn_cast<ArrayType>(PT->getElementType());
-    if (AT == 0 || !AT->getElementType()->isInteger(8))
+    if (AT == 0 || !AT->getElementType()->isIntegerTy(8))
       return false;
     
     // Check to make sure that the first operand of the GEP is an integer and
@@ -1411,7 +1412,7 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
   
   // Must be a Constant Array
   ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
-  if (Array == 0 || !Array->getType()->getElementType()->isInteger(8))
+  if (Array == 0 || !Array->getType()->getElementType()->isIntegerTy(8))
     return false;
   
   // Get the number of elements in the array
@@ -1436,3 +1437,131 @@ bool llvm::GetConstantStringInfo(Value *V, std::string &Str, uint64_t Offset,
   // The array isn't null terminated, but maybe this is a memcpy, not a strcpy.
   return true;
 }
+
+// These next two are very similar to the above, but also look through PHI
+// nodes.
+// TODO: See if we can integrate these two together.
+
+/// GetStringLengthH - If we can compute the length of the string pointed to by
+/// the specified pointer, return 'len+1'.  If we can't, return 0.
+static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
+  // Look through noop bitcast instructions.
+  if (BitCastInst *BCI = dyn_cast<BitCastInst>(V))
+    return GetStringLengthH(BCI->getOperand(0), PHIs);
+
+  // If this is a PHI node, there are two cases: either we have already seen it
+  // or we haven't.
+  if (PHINode *PN = dyn_cast<PHINode>(V)) {
+    if (!PHIs.insert(PN))
+      return ~0ULL;  // already in the set.
+
+    // If it was new, see if all the input strings are the same length.
+    uint64_t LenSoFar = ~0ULL;
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      uint64_t Len = GetStringLengthH(PN->getIncomingValue(i), PHIs);
+      if (Len == 0) return 0; // Unknown length -> unknown.
+
+      if (Len == ~0ULL) continue;
+
+      if (Len != LenSoFar && LenSoFar != ~0ULL)
+        return 0;    // Disagree -> unknown.
+      LenSoFar = Len;
+    }
+
+    // Success, all agree.
+    return LenSoFar;
+  }
+
+  // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
+  if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
+    uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs);
+    if (Len1 == 0) return 0;
+    uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs);
+    if (Len2 == 0) return 0;
+    if (Len1 == ~0ULL) return Len2;
+    if (Len2 == ~0ULL) return Len1;
+    if (Len1 != Len2) return 0;
+    return Len1;
+  }
+
+  // If the value is not a GEP instruction nor a constant expression with a
+  // GEP instruction, then return unknown.
+  User *GEP = 0;
+  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) {
+    GEP = GEPI;
+  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+    if (CE->getOpcode() != Instruction::GetElementPtr)
+      return 0;
+    GEP = CE;
+  } else {
+    return 0;
+  }
+
+  // Make sure the GEP has exactly three arguments.
+  if (GEP->getNumOperands() != 3)
+    return 0;
+
+  // Check to make sure that the first operand of the GEP is an integer and
+  // has value 0 so that we are sure we're indexing into the initializer.
+  if (ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(1))) {
+    if (!Idx->isZero())
+      return 0;
+  } else
+    return 0;
+
+  // If the second index isn't a ConstantInt, then this is a variable index
+  // into the array.  If this occurs, we can't say anything meaningful about
+  // the string.
+  uint64_t StartIdx = 0;
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2)))
+    StartIdx = CI->getZExtValue();
+  else
+    return 0;
+
+  // The GEP instruction, constant or instruction, must reference a global
+  // variable that is a constant and is initialized. The referenced constant
+  // initializer is the array that we'll use for optimization.
+  GlobalVariable* GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
+  if (!GV || !GV->isConstant() || !GV->hasInitializer() ||
+      GV->mayBeOverridden())
+    return 0;
+  Constant *GlobalInit = GV->getInitializer();
+
+  // Handle the ConstantAggregateZero case, which is a degenerate case. The
+  // initializer is constant zero so the length of the string must be zero.
+  if (isa<ConstantAggregateZero>(GlobalInit))
+    return 1;  // Len = 0 offset by 1.
+
+  // Must be a Constant Array
+  ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
+  if (!Array || !Array->getType()->getElementType()->isIntegerTy(8))
+    return false;
+
+  // Get the number of elements in the array
+  uint64_t NumElts = Array->getType()->getNumElements();
+
+  // Traverse the constant array from StartIdx (derived above) which is
+  // the place the GEP refers to in the array.
+  for (unsigned i = StartIdx; i != NumElts; ++i) {
+    Constant *Elt = Array->getOperand(i);
+    ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
+    if (!CI) // This array isn't suitable, non-int initializer.
+      return 0;
+    if (CI->isZero())
+      return i-StartIdx+1; // We found end of string, success!
+  }
+
+  return 0; // The array isn't null terminated, conservatively return 'unknown'.
+}
+
+/// GetStringLength - If we can compute the length of the string pointed to by
+/// the specified pointer, return 'len+1'.  If we can't, return 0.
+uint64_t llvm::GetStringLength(Value *V) {
+  if (!V->getType()->isPointerTy()) return 0;
+
+  SmallPtrSet<PHINode*, 32> PHIs;
+  uint64_t Len = GetStringLengthH(V, PHIs);
+  // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
+  // an empty string as a length.
+  return Len == ~0ULL ? 1 : Len;
+}
diff --git a/lib/AsmParser/Android.mk b/lib/AsmParser/Android.mk
new file mode 100644
index 0000000..548f719
--- /dev/null
+++ b/lib/AsmParser/Android.mk
@@ -0,0 +1,28 @@
+LOCAL_PATH:= $(call my-dir)
+
+asm_parser_SRC_FILES :=	\
+	LLLexer.cpp	\
+	LLParser.cpp	\
+	Parser.cpp
+
+# For the host
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(asm_parser_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMAsmParser
+
+include $(LOCAL_PATH)/../../llvm-host-build.mk
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+# For the device
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(asm_parser_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMAsmParser
+
+include $(LOCAL_PATH)/../../llvm-device-build.mk
+include $(BUILD_STATIC_LIBRARY)
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index 8ad658d..46f3cbc 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -570,6 +570,7 @@ lltok::Kind LLLexer::LexIdentifier() {
 
   KEYWORD(type);
   KEYWORD(opaque);
+  KEYWORD(union);
 
   KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle);
   KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge);
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 5dd6569..8083a07 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -614,7 +614,7 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc,
     Aliasee = ID.ConstantVal;
   }
 
-  if (!isa<PointerType>(Aliasee->getType()))
+  if (!Aliasee->getType()->isPointerTy())
     return Error(AliaseeLoc, "alias must have pointer type");
 
   // Okay, create the alias but do not insert it into the module yet.
@@ -685,7 +685,7 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
       return true;
   }
 
-  if (isa<FunctionType>(Ty) || Ty->isLabelTy())
+  if (Ty->isFunctionTy() || Ty->isLabelTy())
     return Error(TyLoc, "invalid type for global variable");
 
   GlobalVariable *GV = 0;
@@ -791,7 +791,7 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, const Type *Ty,
   GlobalValue *FwdVal;
   if (const FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType())) {
     // Function types can return opaque but functions can't.
-    if (isa<OpaqueType>(FT->getReturnType())) {
+    if (FT->getReturnType()->isOpaqueTy()) {
       Error(Loc, "function may not return opaque type");
       return 0;
     }
@@ -836,7 +836,7 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc) {
   GlobalValue *FwdVal;
   if (const FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType())) {
     // Function types can return opaque but functions can't.
-    if (isa<OpaqueType>(FT->getReturnType())) {
+    if (FT->getReturnType()->isOpaqueTy()) {
       Error(Loc, "function may not return opaque type");
       return 0;
     }
@@ -956,6 +956,14 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) {
     case lltok::kw_noimplicitfloat: Attrs |= Attribute::NoImplicitFloat; break;
     case lltok::kw_naked:           Attrs |= Attribute::Naked; break;
 
+    case lltok::kw_alignstack: {
+      unsigned Alignment;
+      if (ParseOptionalStackAlignment(Alignment))
+        return true;
+      Attrs |= Attribute::constructStackAlignmentFromInt(Alignment);
+      continue;
+    }
+
     case lltok::kw_align: {
       unsigned Alignment;
       if (ParseOptionalAlignment(Alignment))
@@ -963,6 +971,7 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) {
       Attrs |= Attribute::constructAlignmentFromInt(Alignment);
       continue;
     }
+
     }
     Lex.Lex();
   }
@@ -1131,6 +1140,25 @@ bool LLParser::ParseOptionalCommaAlign(unsigned &Alignment,
   return false;
 }
 
+/// ParseOptionalStackAlignment
+///   ::= /* empty */
+///   ::= 'alignstack' '(' 4 ')'
+bool LLParser::ParseOptionalStackAlignment(unsigned &Alignment) {
+  Alignment = 0;
+  if (!EatIfPresent(lltok::kw_alignstack))
+    return false;
+  LocTy ParenLoc = Lex.getLoc();
+  if (!EatIfPresent(lltok::lparen))
+    return Error(ParenLoc, "expected '('");
+  LocTy AlignLoc = Lex.getLoc();
+  if (ParseUInt32(Alignment)) return true;
+  ParenLoc = Lex.getLoc();
+  if (!EatIfPresent(lltok::rparen))
+    return Error(ParenLoc, "expected ')'");
+  if (!isPowerOf2_32(Alignment))
+    return Error(AlignLoc, "stack alignment is not a power of two");
+  return false;
+}
 
 /// ParseIndexList - This parses the index list for an insert/extractvalue
 /// instruction.  This sets AteExtraComma in the case where we eat an extra
@@ -1267,6 +1295,11 @@ bool LLParser::ParseTypeRec(PATypeHolder &Result) {
     if (ParseStructType(Result, false))
       return true;
     break;
+  case lltok::kw_union:
+    // TypeRec ::= 'union' '{' ... '}'
+    if (ParseUnionType(Result))
+      return true;
+    break;
   case lltok::lsquare:
     // TypeRec ::= '[' ... ']'
     Lex.Lex(); // eat the lsquare.
@@ -1482,7 +1515,7 @@ bool LLParser::ParseArgumentList(std::vector<ArgInfo> &ArgList,
         Name = "";
       }
 
-      if (!ArgTy->isFirstClassType() && !isa<OpaqueType>(ArgTy))
+      if (!ArgTy->isFirstClassType() && !ArgTy->isOpaqueTy())
         return Error(TypeLoc, "invalid type for function argument");
 
       ArgList.push_back(ArgInfo(TypeLoc, ArgTy, Attrs, Name));
@@ -1576,6 +1609,38 @@ bool LLParser::ParseStructType(PATypeHolder &Result, bool Packed) {
   return false;
 }
 
+/// ParseUnionType
+///   TypeRec
+///     ::= 'union' '{' TypeRec (',' TypeRec)* '}'
+bool LLParser::ParseUnionType(PATypeHolder &Result) {
+  assert(Lex.getKind() == lltok::kw_union);
+  Lex.Lex(); // Consume the 'union'
+
+  if (ParseToken(lltok::lbrace, "'{' expected after 'union'")) return true;
+
+  SmallVector<PATypeHolder, 8> ParamsList;
+  do {
+    LocTy EltTyLoc = Lex.getLoc();
+    if (ParseTypeRec(Result)) return true;
+    ParamsList.push_back(Result);
+
+    if (Result->isVoidTy())
+      return Error(EltTyLoc, "union element can not have void type");
+    if (!UnionType::isValidElementType(Result))
+      return Error(EltTyLoc, "invalid element type for union");
+
+  } while (EatIfPresent(lltok::comma)) ;
+
+  if (ParseToken(lltok::rbrace, "expected '}' at end of union"))
+    return true;
+
+  SmallVector<const Type*, 8> ParamsListTy;
+  for (unsigned i = 0, e = ParamsList.size(); i != e; ++i)
+    ParamsListTy.push_back(ParamsList[i].get());
+  Result = HandleUpRefs(UnionType::get(&ParamsListTy[0], ParamsListTy.size()));
+  return false;
+}
+
 /// ParseArrayVectorType - Parse an array or vector type, assuming the first
 /// token has already been consumed.
 ///   TypeRec
@@ -1720,7 +1785,7 @@ Value *LLParser::PerFunctionState::GetVal(const std::string &Name,
   }
 
   // Don't make placeholders with invalid type.
-  if (!Ty->isFirstClassType() && !isa<OpaqueType>(Ty) && !Ty->isLabelTy()) {
+  if (!Ty->isFirstClassType() && !Ty->isOpaqueTy() && !Ty->isLabelTy()) {
     P.Error(Loc, "invalid use of a non-first-class type");
     return 0;
   }
@@ -1761,7 +1826,7 @@ Value *LLParser::PerFunctionState::GetVal(unsigned ID, const Type *Ty,
     return 0;
   }
 
-  if (!Ty->isFirstClassType() && !isa<OpaqueType>(Ty) && !Ty->isLabelTy()) {
+  if (!Ty->isFirstClassType() && !Ty->isOpaqueTy() && !Ty->isLabelTy()) {
     P.Error(Loc, "invalid use of a non-first-class type");
     return 0;
   }
@@ -1992,8 +2057,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
     if (Elts.empty())
       return Error(ID.Loc, "constant vector must not be empty");
 
-    if (!Elts[0]->getType()->isInteger() &&
-        !Elts[0]->getType()->isFloatingPoint())
+    if (!Elts[0]->getType()->isIntegerTy() &&
+        !Elts[0]->getType()->isFloatingPointTy())
       return Error(FirstEltLoc,
                    "vector elements must have integer or floating point type");
 
@@ -2135,8 +2200,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
         ParseToken(lltok::rparen, "expected ')' in extractvalue constantexpr"))
       return true;
 
-    if (!isa<StructType>(Val->getType()) && !isa<ArrayType>(Val->getType()))
-      return Error(ID.Loc, "extractvalue operand must be array or struct");
+    if (!Val->getType()->isAggregateType())
+      return Error(ID.Loc, "extractvalue operand must be aggregate type");
     if (!ExtractValueInst::getIndexedType(Val->getType(), Indices.begin(),
                                           Indices.end()))
       return Error(ID.Loc, "invalid indices for extractvalue");
@@ -2156,8 +2221,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
         ParseIndexList(Indices) ||
         ParseToken(lltok::rparen, "expected ')' in insertvalue constantexpr"))
       return true;
-    if (!isa<StructType>(Val0->getType()) && !isa<ArrayType>(Val0->getType()))
-      return Error(ID.Loc, "extractvalue operand must be array or struct");
+    if (!Val0->getType()->isAggregateType())
+      return Error(ID.Loc, "insertvalue operand must be aggregate type");
     if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices.begin(),
                                           Indices.end()))
       return Error(ID.Loc, "invalid indices for insertvalue");
@@ -2185,13 +2250,13 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
     CmpInst::Predicate Pred = (CmpInst::Predicate)PredVal;
 
     if (Opc == Instruction::FCmp) {
-      if (!Val0->getType()->isFPOrFPVector())
+      if (!Val0->getType()->isFPOrFPVectorTy())
         return Error(ID.Loc, "fcmp requires floating point operands");
       ID.ConstantVal = ConstantExpr::getFCmp(Pred, Val0, Val1);
     } else {
       assert(Opc == Instruction::ICmp && "Unexpected opcode for CmpInst!");
-      if (!Val0->getType()->isIntOrIntVector() &&
-          !isa<PointerType>(Val0->getType()))
+      if (!Val0->getType()->isIntOrIntVectorTy() &&
+          !Val0->getType()->isPointerTy())
         return Error(ID.Loc, "icmp requires pointer or integer operands");
       ID.ConstantVal = ConstantExpr::getICmp(Pred, Val0, Val1);
     }
@@ -2241,7 +2306,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
       return true;
     if (Val0->getType() != Val1->getType())
       return Error(ID.Loc, "operands of constexpr must have same type");
-    if (!Val0->getType()->isIntOrIntVector()) {
+    if (!Val0->getType()->isIntOrIntVectorTy()) {
       if (NUW)
         return Error(ModifierLoc, "nuw only applies to integer operations");
       if (NSW)
@@ -2249,8 +2314,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
     }
     // API compatibility: Accept either integer or floating-point types with
     // add, sub, and mul.
-    if (!Val0->getType()->isIntOrIntVector() &&
-        !Val0->getType()->isFPOrFPVector())
+    if (!Val0->getType()->isIntOrIntVectorTy() &&
+        !Val0->getType()->isFPOrFPVectorTy())
       return Error(ID.Loc,"constexpr requires integer, fp, or vector operands");
     unsigned Flags = 0;
     if (NUW)   Flags |= OverflowingBinaryOperator::NoUnsignedWrap;
@@ -2280,7 +2345,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
       return true;
     if (Val0->getType() != Val1->getType())
       return Error(ID.Loc, "operands of constexpr must have same type");
-    if (!Val0->getType()->isIntOrIntVector())
+    if (!Val0->getType()->isIntOrIntVectorTy())
       return Error(ID.Loc,
                    "constexpr requires integer or integer vector operands");
     ID.ConstantVal = ConstantExpr::get(Opc, Val0, Val1);
@@ -2305,7 +2370,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
       return true;
 
     if (Opc == Instruction::GetElementPtr) {
-      if (Elts.size() == 0 || !isa<PointerType>(Elts[0]->getType()))
+      if (Elts.size() == 0 || !Elts[0]->getType()->isPointerTy())
         return Error(ID.Loc, "getelementptr requires pointer operand");
 
       if (!GetElementPtrInst::getIndexedType(Elts[0]->getType(),
@@ -2405,7 +2470,7 @@ bool LLParser::ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts) {
 
 bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
                                    PerFunctionState *PFS) {
-  if (isa<FunctionType>(Ty))
+  if (Ty->isFunctionTy())
     return Error(ID.Loc, "functions are not values, refer to them as pointers");
 
   switch (ID.Kind) {
@@ -2444,13 +2509,13 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
     V = GetGlobalVal(ID.UIntVal, Ty, ID.Loc);
     return V == 0;
   case ValID::t_APSInt:
-    if (!isa<IntegerType>(Ty))
+    if (!Ty->isIntegerTy())
       return Error(ID.Loc, "integer constant must have integer type");
     ID.APSIntVal.extOrTrunc(Ty->getPrimitiveSizeInBits());
     V = ConstantInt::get(Context, ID.APSIntVal);
     return false;
   case ValID::t_APFloat:
-    if (!Ty->isFloatingPoint() ||
+    if (!Ty->isFloatingPointTy() ||
         !ConstantFP::isValueValidForType(Ty, ID.APFloatVal))
       return Error(ID.Loc, "floating point constant invalid for type");
 
@@ -2470,19 +2535,19 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
 
     return false;
   case ValID::t_Null:
-    if (!isa<PointerType>(Ty))
+    if (!Ty->isPointerTy())
       return Error(ID.Loc, "null must be a pointer type");
     V = ConstantPointerNull::get(cast<PointerType>(Ty));
     return false;
   case ValID::t_Undef:
     // FIXME: LabelTy should not be a first-class type.
     if ((!Ty->isFirstClassType() || Ty->isLabelTy()) &&
-        !isa<OpaqueType>(Ty))
+        !Ty->isOpaqueTy())
       return Error(ID.Loc, "invalid type for undef constant");
     V = UndefValue::get(Ty);
     return false;
   case ValID::t_EmptyArray:
-    if (!isa<ArrayType>(Ty) || cast<ArrayType>(Ty)->getNumElements() != 0)
+    if (!Ty->isArrayTy() || cast<ArrayType>(Ty)->getNumElements() != 0)
       return Error(ID.Loc, "invalid empty array initializer");
     V = UndefValue::get(Ty);
     return false;
@@ -2493,8 +2558,17 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
     V = Constant::getNullValue(Ty);
     return false;
   case ValID::t_Constant:
-    if (ID.ConstantVal->getType() != Ty)
+    if (ID.ConstantVal->getType() != Ty) {
+      // Allow a constant struct with a single member to be converted
+      // to a union, if the union has a member which is the same type
+      // as the struct member.
+      if (const UnionType* utype = dyn_cast<UnionType>(Ty)) {
+        return ParseUnionValue(utype, ID, V);
+      }
+
       return Error(ID.Loc, "constant expression type mismatch");
+    }
+
     V = ID.ConstantVal;
     return false;
   }
@@ -2524,6 +2598,22 @@ bool LLParser::ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
   return false;
 }
 
+bool LLParser::ParseUnionValue(const UnionType* utype, ValID &ID, Value *&V) {
+  if (const StructType* stype = dyn_cast<StructType>(ID.ConstantVal->getType())) {
+    if (stype->getNumContainedTypes() != 1)
+      return Error(ID.Loc, "constant expression type mismatch");
+    int index = utype->getElementTypeIndex(stype->getContainedType(0));
+    if (index < 0)
+      return Error(ID.Loc, "initializer type is not a member of the union");
+
+    V = ConstantUnion::get(
+        utype, cast<Constant>(ID.ConstantVal->getOperand(0)));
+    return false;
+  }
+
+  return Error(ID.Loc, "constant expression type mismatch");
+}
+
 
 /// FunctionHeader
 ///   ::= OptionalLinkage OptionalVisibility OptionalCallingConv OptRetAttrs
@@ -2572,7 +2662,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   }
 
   if (!FunctionType::isValidReturnType(RetType) ||
-      isa<OpaqueType>(RetType))
+      RetType->isOpaqueTy())
     return Error(RetTypeLoc, "invalid function return type");
 
   LocTy NameLoc = Lex.getLoc();
@@ -2873,7 +2963,7 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
     // API compatibility: Accept either integer or floating-point types.
     bool Result = ParseArithmetic(Inst, PFS, KeywordVal, 0);
     if (!Result) {
-      if (!Inst->getType()->isIntOrIntVector()) {
+      if (!Inst->getType()->isIntOrIntVectorTy()) {
         if (NUW)
           return Error(ModifierLoc, "nuw only applies to integer operations");
         if (NSW)
@@ -3096,7 +3186,7 @@ bool LLParser::ParseSwitch(Instruction *&Inst, PerFunctionState &PFS) {
       ParseToken(lltok::lsquare, "expected '[' with switch table"))
     return true;
 
-  if (!isa<IntegerType>(Cond->getType()))
+  if (!Cond->getType()->isIntegerTy())
     return Error(CondLoc, "switch condition must have integer type");
 
   // Parse the jump table pairs.
@@ -3139,7 +3229,7 @@ bool LLParser::ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS) {
       ParseToken(lltok::lsquare, "expected '[' with indirectbr"))
     return true;
   
-  if (!isa<PointerType>(Address->getType()))
+  if (!Address->getType()->isPointerTy())
     return Error(AddrLoc, "indirectbr address must have pointer type");
   
   // Parse the destination list.
@@ -3292,11 +3382,11 @@ bool LLParser::ParseArithmetic(Instruction *&Inst, PerFunctionState &PFS,
   switch (OperandType) {
   default: llvm_unreachable("Unknown operand type!");
   case 0: // int or FP.
-    Valid = LHS->getType()->isIntOrIntVector() ||
-            LHS->getType()->isFPOrFPVector();
+    Valid = LHS->getType()->isIntOrIntVectorTy() ||
+            LHS->getType()->isFPOrFPVectorTy();
     break;
-  case 1: Valid = LHS->getType()->isIntOrIntVector(); break;
-  case 2: Valid = LHS->getType()->isFPOrFPVector(); break;
+  case 1: Valid = LHS->getType()->isIntOrIntVectorTy(); break;
+  case 2: Valid = LHS->getType()->isFPOrFPVectorTy(); break;
   }
 
   if (!Valid)
@@ -3316,7 +3406,7 @@ bool LLParser::ParseLogical(Instruction *&Inst, PerFunctionState &PFS,
       ParseValue(LHS->getType(), RHS, PFS))
     return true;
 
-  if (!LHS->getType()->isIntOrIntVector())
+  if (!LHS->getType()->isIntOrIntVectorTy())
     return Error(Loc,"instruction requires integer or integer vector operands");
 
   Inst = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
@@ -3340,13 +3430,13 @@ bool LLParser::ParseCompare(Instruction *&Inst, PerFunctionState &PFS,
     return true;
 
   if (Opc == Instruction::FCmp) {
-    if (!LHS->getType()->isFPOrFPVector())
+    if (!LHS->getType()->isFPOrFPVectorTy())
       return Error(Loc, "fcmp requires floating point operands");
     Inst = new FCmpInst(CmpInst::Predicate(Pred), LHS, RHS);
   } else {
     assert(Opc == Instruction::ICmp && "Unknown opcode for CmpInst!");
-    if (!LHS->getType()->isIntOrIntVector() &&
-        !isa<PointerType>(LHS->getType()))
+    if (!LHS->getType()->isIntOrIntVectorTy() &&
+        !LHS->getType()->isPointerTy())
       return Error(Loc, "icmp requires integer operands");
     Inst = new ICmpInst(CmpInst::Predicate(Pred), LHS, RHS);
   }
@@ -3643,7 +3733,7 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS,
     }
   }
 
-  if (Size && !Size->getType()->isInteger(32))
+  if (Size && !Size->getType()->isIntegerTy(32))
     return Error(SizeLoc, "element count must be i32");
 
   if (isAlloca) {
@@ -3671,7 +3761,7 @@ bool LLParser::ParseFree(Instruction *&Inst, PerFunctionState &PFS,
                          BasicBlock* BB) {
   Value *Val; LocTy Loc;
   if (ParseTypeAndValue(Val, Loc, PFS)) return true;
-  if (!isa<PointerType>(Val->getType()))
+  if (!Val->getType()->isPointerTy())
     return Error(Loc, "operand to free must be a pointer");
   Inst = CallInst::CreateFree(Val, BB);
   return false;
@@ -3688,7 +3778,7 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS,
       ParseOptionalCommaAlign(Alignment, AteExtraComma))
     return true;
 
-  if (!isa<PointerType>(Val->getType()) ||
+  if (!Val->getType()->isPointerTy() ||
       !cast<PointerType>(Val->getType())->getElementType()->isFirstClassType())
     return Error(Loc, "load operand must be a pointer to a first class type");
 
@@ -3709,7 +3799,7 @@ int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS,
       ParseOptionalCommaAlign(Alignment, AteExtraComma))
     return true;
 
-  if (!isa<PointerType>(Ptr->getType()))
+  if (!Ptr->getType()->isPointerTy())
     return Error(PtrLoc, "store operand must be a pointer");
   if (!Val->getType()->isFirstClassType())
     return Error(Loc, "store operand must be a first class value");
@@ -3731,7 +3821,7 @@ bool LLParser::ParseGetResult(Instruction *&Inst, PerFunctionState &PFS) {
       ParseUInt32(Element, EltLoc))
     return true;
 
-  if (!isa<StructType>(Val->getType()) && !isa<ArrayType>(Val->getType()))
+  if (!Val->getType()->isStructTy() && !Val->getType()->isArrayTy())
     return Error(ValLoc, "getresult inst requires an aggregate operand");
   if (!ExtractValueInst::getIndexedType(Val->getType(), Element))
     return Error(EltLoc, "invalid getresult index for value");
@@ -3748,7 +3838,7 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
 
   if (ParseTypeAndValue(Ptr, Loc, PFS)) return true;
 
-  if (!isa<PointerType>(Ptr->getType()))
+  if (!Ptr->getType()->isPointerTy())
     return Error(Loc, "base of getelementptr must be a pointer");
 
   SmallVector<Value*, 16> Indices;
@@ -3759,7 +3849,7 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
       break;
     }
     if (ParseTypeAndValue(Val, EltLoc, PFS)) return true;
-    if (!isa<IntegerType>(Val->getType()))
+    if (!Val->getType()->isIntegerTy())
       return Error(EltLoc, "getelementptr index must be an integer");
     Indices.push_back(Val);
   }
@@ -3783,8 +3873,8 @@ int LLParser::ParseExtractValue(Instruction *&Inst, PerFunctionState &PFS) {
       ParseIndexList(Indices, AteExtraComma))
     return true;
 
-  if (!isa<StructType>(Val->getType()) && !isa<ArrayType>(Val->getType()))
-    return Error(Loc, "extractvalue operand must be array or struct");
+  if (!Val->getType()->isAggregateType())
+    return Error(Loc, "extractvalue operand must be aggregate type");
 
   if (!ExtractValueInst::getIndexedType(Val->getType(), Indices.begin(),
                                         Indices.end()))
@@ -3805,8 +3895,8 @@ int LLParser::ParseInsertValue(Instruction *&Inst, PerFunctionState &PFS) {
       ParseIndexList(Indices, AteExtraComma))
     return true;
   
-  if (!isa<StructType>(Val0->getType()) && !isa<ArrayType>(Val0->getType()))
-    return Error(Loc0, "extractvalue operand must be array or struct");
+  if (!Val0->getType()->isAggregateType())
+    return Error(Loc0, "insertvalue operand must be aggregate type");
 
   if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices.begin(),
                                         Indices.end()))
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index 85c07ff..9abe404 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -31,6 +31,7 @@ namespace llvm {
   class GlobalValue;
   class MDString;
   class MDNode;
+  class UnionType;
 
   /// ValID - Represents a reference of a definition of some sort with no type.
   /// There are several cases where we have to parse the value but where the
@@ -169,6 +170,7 @@ namespace llvm {
     bool ParseOptionalVisibility(unsigned &Visibility);
     bool ParseOptionalCallingConv(CallingConv::ID &CC);
     bool ParseOptionalAlignment(unsigned &Alignment);
+    bool ParseOptionalStackAlignment(unsigned &Alignment);
     bool ParseInstructionMetadata(SmallVectorImpl<std::pair<unsigned,
                                                             MDNode *> > &);
     bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma);
@@ -211,6 +213,7 @@ namespace llvm {
     }
     bool ParseTypeRec(PATypeHolder &H);
     bool ParseStructType(PATypeHolder &H, bool Packed);
+    bool ParseUnionType(PATypeHolder &H);
     bool ParseArrayVectorType(PATypeHolder &H, bool isVector);
     bool ParseFunctionType(PATypeHolder &Result);
     PATypeHolder HandleUpRefs(const Type *Ty);
@@ -279,6 +282,8 @@ namespace llvm {
       return ParseTypeAndBasicBlock(BB, Loc, PFS);
     }
 
+    bool ParseUnionValue(const UnionType* utype, ValID &ID, Value *&V);
+
     struct ParamInfo {
       LocTy Loc;
       Value *V;
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index 7f1807c..3ac9169 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -97,6 +97,7 @@ namespace lltok {
 
     kw_type,
     kw_opaque,
+    kw_union,
 
     kw_eq, kw_ne, kw_slt, kw_sgt, kw_sle, kw_sge, kw_ult, kw_ugt, kw_ule,
     kw_uge, kw_oeq, kw_one, kw_olt, kw_ogt, kw_ole, kw_oge, kw_ord, kw_uno,
diff --git a/lib/Bitcode/Reader/Android.mk b/lib/Bitcode/Reader/Android.mk
new file mode 100644
index 0000000..165b0d0
--- /dev/null
+++ b/lib/Bitcode/Reader/Android.mk
@@ -0,0 +1,29 @@
+LOCAL_PATH:= $(call my-dir)
+
+bitcode_reader_SRC_FILES :=	\
+	BitReader.cpp	\
+	BitcodeReader.cpp
+
+# For the host
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(bitcode_reader_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMBitReader
+
+include $(LLVM_HOST_BUILD_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+# For the device
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(bitcode_reader_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMBitReader
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/lib/Bitcode/Reader/BitReader.cpp b/lib/Bitcode/Reader/BitReader.cpp
index 1facbc3..15844c0 100644
--- a/lib/Bitcode/Reader/BitReader.cpp
+++ b/lib/Bitcode/Reader/BitReader.cpp
@@ -21,17 +21,8 @@ using namespace llvm;
    Optionally returns a human-readable error message via OutMessage. */
 LLVMBool LLVMParseBitcode(LLVMMemoryBufferRef MemBuf,
                           LLVMModuleRef *OutModule, char **OutMessage) {
-  std::string Message;
-  
-  *OutModule = wrap(ParseBitcodeFile(unwrap(MemBuf), getGlobalContext(),  
-                                     &Message));
-  if (!*OutModule) {
-    if (OutMessage)
-      *OutMessage = strdup(Message.c_str());
-    return 1;
-  }
-  
-  return 0;
+  return LLVMParseBitcodeInContext(wrap(&getGlobalContext()), MemBuf, OutModule,
+                                   OutMessage);
 }
 
 LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
@@ -54,36 +45,44 @@ LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
 /* Reads a module from the specified path, returning via the OutModule parameter
    a module provider which performs lazy deserialization. Returns 0 on success.
    Optionally returns a human-readable error message via OutMessage. */ 
-LLVMBool LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf,
-                                      LLVMModuleProviderRef *OutMP,
-                                      char **OutMessage) {
+LLVMBool LLVMGetBitcodeModuleInContext(LLVMContextRef ContextRef,
+                                       LLVMMemoryBufferRef MemBuf,
+                                       LLVMModuleRef *OutM,
+                                       char **OutMessage) {
   std::string Message;
-
-  *OutMP = reinterpret_cast<LLVMModuleProviderRef>(
-    getLazyBitcodeModule(unwrap(MemBuf), getGlobalContext(), &Message));
-                                         
-  if (!*OutMP) {
+  
+  *OutM = wrap(getLazyBitcodeModule(unwrap(MemBuf), *unwrap(ContextRef),
+                                    &Message));
+  if (!*OutM) {
     if (OutMessage)
       *OutMessage = strdup(Message.c_str());
-      return 1;
+    return 1;
   }
-
+  
   return 0;
+
 }
 
+LLVMBool LLVMGetBitcodeModule(LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutM,
+                              char **OutMessage) {
+  return LLVMGetBitcodeModuleInContext(LLVMGetGlobalContext(), MemBuf, OutM,
+                                       OutMessage);
+}
+
+/* Deprecated: Use LLVMGetBitcodeModuleInContext instead. */
 LLVMBool LLVMGetBitcodeModuleProviderInContext(LLVMContextRef ContextRef,
                                                LLVMMemoryBufferRef MemBuf,
                                                LLVMModuleProviderRef *OutMP,
                                                char **OutMessage) {
-  std::string Message;
-  
-  *OutMP = reinterpret_cast<LLVMModuleProviderRef>(
-    getLazyBitcodeModule(unwrap(MemBuf), *unwrap(ContextRef), &Message));
-  if (!*OutMP) {
-    if (OutMessage)
-      *OutMessage = strdup(Message.c_str());
-    return 1;
-  }
-  
-  return 0;
+  return LLVMGetBitcodeModuleInContext(ContextRef, MemBuf,
+                                       reinterpret_cast<LLVMModuleRef*>(OutMP),
+                                       OutMessage);
+}
+
+/* Deprecated: Use LLVMGetBitcodeModule instead. */
+LLVMBool LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf,
+                                      LLVMModuleProviderRef *OutMP,
+                                      char **OutMessage) {
+  return LLVMGetBitcodeModuleProviderInContext(LLVMGetGlobalContext(), MemBuf,
+                                               OutMP, OutMessage);
 }
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 4dfc6ce..a328837 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -108,17 +108,17 @@ static int GetDecodedBinaryOpcode(unsigned Val, const Type *Ty) {
   switch (Val) {
   default: return -1;
   case bitc::BINOP_ADD:
-    return Ty->isFPOrFPVector() ? Instruction::FAdd : Instruction::Add;
+    return Ty->isFPOrFPVectorTy() ? Instruction::FAdd : Instruction::Add;
   case bitc::BINOP_SUB:
-    return Ty->isFPOrFPVector() ? Instruction::FSub : Instruction::Sub;
+    return Ty->isFPOrFPVectorTy() ? Instruction::FSub : Instruction::Sub;
   case bitc::BINOP_MUL:
-    return Ty->isFPOrFPVector() ? Instruction::FMul : Instruction::Mul;
+    return Ty->isFPOrFPVectorTy() ? Instruction::FMul : Instruction::Mul;
   case bitc::BINOP_UDIV: return Instruction::UDiv;
   case bitc::BINOP_SDIV:
-    return Ty->isFPOrFPVector() ? Instruction::FDiv : Instruction::SDiv;
+    return Ty->isFPOrFPVectorTy() ? Instruction::FDiv : Instruction::SDiv;
   case bitc::BINOP_UREM: return Instruction::URem;
   case bitc::BINOP_SREM:
-    return Ty->isFPOrFPVector() ? Instruction::FRem : Instruction::SRem;
+    return Ty->isFPOrFPVectorTy() ? Instruction::FRem : Instruction::SRem;
   case bitc::BINOP_SHL:  return Instruction::Shl;
   case bitc::BINOP_LSHR: return Instruction::LShr;
   case bitc::BINOP_ASHR: return Instruction::AShr;
@@ -585,6 +585,13 @@ bool BitcodeReader::ParseTypeTable() {
       ResultTy = StructType::get(Context, EltTys, Record[0]);
       break;
     }
+    case bitc::TYPE_CODE_UNION: {  // UNION: [eltty x N]
+      SmallVector<const Type*, 8> EltTys;
+      for (unsigned i = 0, e = Record.size(); i != e; ++i)
+        EltTys.push_back(getTypeByID(Record[i], true));
+      ResultTy = UnionType::get(&EltTys[0], EltTys.size());
+      break;
+    }
     case bitc::TYPE_CODE_ARRAY:     // ARRAY: [numelts, eltty]
       if (Record.size() < 2)
         return Error("Invalid ARRAY type record");
@@ -956,12 +963,12 @@ bool BitcodeReader::ParseConstants() {
       V = Constant::getNullValue(CurTy);
       break;
     case bitc::CST_CODE_INTEGER:   // INTEGER: [intval]
-      if (!isa<IntegerType>(CurTy) || Record.empty())
+      if (!CurTy->isIntegerTy() || Record.empty())
         return Error("Invalid CST_INTEGER record");
       V = ConstantInt::get(CurTy, DecodeSignRotatedValue(Record[0]));
       break;
     case bitc::CST_CODE_WIDE_INTEGER: {// WIDE_INTEGER: [n x intval]
-      if (!isa<IntegerType>(CurTy) || Record.empty())
+      if (!CurTy->isIntegerTy() || Record.empty())
         return Error("Invalid WIDE_INTEGER record");
 
       unsigned NumWords = Record.size();
@@ -1168,7 +1175,7 @@ bool BitcodeReader::ParseConstants() {
       Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
       Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy);
 
-      if (OpTy->isFPOrFPVector())
+      if (OpTy->isFPOrFPVectorTy())
         V = ConstantExpr::getFCmp(Record[3], Op0, Op1);
       else
         V = ConstantExpr::getICmp(Record[3], Op0, Op1);
@@ -1400,7 +1407,7 @@ bool BitcodeReader::ParseModule() {
       if (Record.size() < 6)
         return Error("Invalid MODULE_CODE_GLOBALVAR record");
       const Type *Ty = getTypeByID(Record[0]);
-      if (!isa<PointerType>(Ty))
+      if (!Ty->isPointerTy())
         return Error("Global not a pointer type!");
       unsigned AddressSpace = cast<PointerType>(Ty)->getAddressSpace();
       Ty = cast<PointerType>(Ty)->getElementType();
@@ -1443,7 +1450,7 @@ bool BitcodeReader::ParseModule() {
       if (Record.size() < 8)
         return Error("Invalid MODULE_CODE_FUNCTION record");
       const Type *Ty = getTypeByID(Record[0]);
-      if (!isa<PointerType>(Ty))
+      if (!Ty->isPointerTy())
         return Error("Function not a pointer type!");
       const FunctionType *FTy =
         dyn_cast<FunctionType>(cast<PointerType>(Ty)->getElementType());
@@ -1484,7 +1491,7 @@ bool BitcodeReader::ParseModule() {
       if (Record.size() < 3)
         return Error("Invalid MODULE_ALIAS record");
       const Type *Ty = getTypeByID(Record[0]);
-      if (!isa<PointerType>(Ty))
+      if (!Ty->isPointerTy())
         return Error("Function not a pointer type!");
 
       GlobalAlias *NewGA = new GlobalAlias(Ty, GetDecodedLinkage(Record[2]),
@@ -1615,6 +1622,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
   if (Stream.EnterSubBlock(bitc::FUNCTION_BLOCK_ID))
     return Error("Malformed block record");
 
+  InstructionList.clear();
   unsigned ModuleValueListSize = ValueList.size();
 
   // Add all the function arguments to the value table.
@@ -1885,7 +1893,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
           OpNum+1 != Record.size())
         return Error("Invalid CMP record");
 
-      if (LHS->getType()->isFPOrFPVector())
+      if (LHS->getType()->isFPOrFPVectorTy())
         I = new FCmpInst((FCmpInst::Predicate)Record[OpNum], LHS, RHS);
       else
         I = new ICmpInst((ICmpInst::Predicate)Record[OpNum], LHS, RHS);
@@ -1925,7 +1933,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
 
         const Type *ReturnType = F->getReturnType();
         if (Vs.size() > 1 ||
-            (isa<StructType>(ReturnType) &&
+            (ReturnType->isStructTy() &&
              (Vs.empty() || Vs[0]->getType() != ReturnType))) {
           Value *RV = UndefValue::get(ReturnType);
           for (unsigned i = 0, e = Vs.size(); i != e; ++i) {
diff --git a/lib/Bitcode/Writer/BitWriter.cpp b/lib/Bitcode/Writer/BitWriter.cpp
index 7ed651b..4288422 100644
--- a/lib/Bitcode/Writer/BitWriter.cpp
+++ b/lib/Bitcode/Writer/BitWriter.cpp
@@ -27,20 +27,14 @@ int LLVMWriteBitcodeToFile(LLVMModuleRef M, const char *Path) {
   return 0;
 }
 
-#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR >= 4)
-#include <ext/stdio_filebuf.h>
-
-int LLVMWriteBitcodeToFileHandle(LLVMModuleRef M, int FileHandle) {
-  raw_fd_ostream OS(FileHandle, false);
+int LLVMWriteBitcodeToFD(LLVMModuleRef M, int FD, int ShouldClose,
+                         int Unbuffered) {
+  raw_fd_ostream OS(FD, ShouldClose, Unbuffered);
   
   WriteBitcodeToFile(unwrap(M), OS);
   return 0;
 }
 
-#else
-
 int LLVMWriteBitcodeToFileHandle(LLVMModuleRef M, int FileHandle) {
-  return -1; // Not supported.
+  return LLVMWriteBitcodeToFD(M, FileHandle, true, false);
 }
-
-#endif
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index a5bb526..82e73b5 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -181,6 +181,14 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
                             Log2_32_Ceil(VE.getTypes().size()+1)));
   unsigned StructAbbrev = Stream.EmitAbbrev(Abbv);
 
+  // Abbrev for TYPE_CODE_UNION.
+  Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_UNION));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+                            Log2_32_Ceil(VE.getTypes().size()+1)));
+  unsigned UnionAbbrev = Stream.EmitAbbrev(Abbv);
+
   // Abbrev for TYPE_CODE_ARRAY.
   Abbv = new BitCodeAbbrev();
   Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY));
@@ -250,6 +258,17 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
       AbbrevToUse = StructAbbrev;
       break;
     }
+    case Type::UnionTyID: {
+      const UnionType *UT = cast<UnionType>(T);
+      // UNION: [eltty x N]
+      Code = bitc::TYPE_CODE_UNION;
+      // Output all of the element types.
+      for (UnionType::element_iterator I = UT->element_begin(),
+           E = UT->element_end(); I != E; ++I)
+        TypeVals.push_back(VE.getTypeID(*I));
+      AbbrevToUse = UnionAbbrev;
+      break;
+    }
     case Type::ArrayTyID: {
       const ArrayType *AT = cast<ArrayType>(T);
       // ARRAY: [numelts, eltty]
@@ -789,7 +808,7 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
       else if (isCStr7)
         AbbrevToUse = CString7Abbrev;
     } else if (isa<ConstantArray>(C) || isa<ConstantStruct>(V) ||
-               isa<ConstantVector>(V)) {
+               isa<ConstantUnion>(C) || isa<ConstantVector>(V)) {
       Code = bitc::CST_CODE_AGGREGATE;
       for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
         Record.push_back(VE.getValueID(C->getOperand(i)));
@@ -1510,16 +1529,50 @@ enum {
   DarwinBCHeaderSize = 5*4
 };
 
+/// isARMTriplet - Return true if the triplet looks like:
+/// arm-*, thumb-*, armv[0-9]-*, thumbv[0-9]-*, armv5te-*, or armv6t2-*.
+static bool isARMTriplet(const std::string &TT) {
+  size_t Pos = 0;
+  size_t Size = TT.size();
+  if (Size >= 6 &&
+      TT[0] == 't' && TT[1] == 'h' && TT[2] == 'u' &&
+      TT[3] == 'm' && TT[4] == 'b')
+    Pos = 5;
+  else if (Size >= 4 && TT[0] == 'a' && TT[1] == 'r' && TT[2] == 'm')
+    Pos = 3;
+  else
+    return false;
+
+  if (TT[Pos] == '-')
+    return true;
+  else if (TT[Pos] == 'v') {
+    if (Size >= Pos+4 &&
+        TT[Pos+1] == '6' && TT[Pos+2] == 't' && TT[Pos+3] == '2')
+      return true;
+    else if (Size >= Pos+4 &&
+             TT[Pos+1] == '5' && TT[Pos+2] == 't' && TT[Pos+3] == 'e')
+      return true;
+  } else
+    return false;
+  while (++Pos < Size && TT[Pos] != '-') {
+    if (!isdigit(TT[Pos]))
+      return false;
+  }
+  return true;
+}
+
 static void EmitDarwinBCHeader(BitstreamWriter &Stream,
                                const std::string &TT) {
   unsigned CPUType = ~0U;
 
-  // Match x86_64-*, i[3-9]86-*, powerpc-*, powerpc64-*.  The CPUType is a
-  // magic number from /usr/include/mach/machine.h.  It is ok to reproduce the
+  // Match x86_64-*, i[3-9]86-*, powerpc-*, powerpc64-*, arm-*, thumb-*,
+  // armv[0-9]-*, thumbv[0-9]-*, armv5te-*, or armv6t2-*. The CPUType is a magic
+  // number from /usr/include/mach/machine.h.  It is ok to reproduce the
   // specific constants here because they are implicitly part of the Darwin ABI.
   enum {
     DARWIN_CPU_ARCH_ABI64      = 0x01000000,
     DARWIN_CPU_TYPE_X86        = 7,
+    DARWIN_CPU_TYPE_ARM        = 12,
     DARWIN_CPU_TYPE_POWERPC    = 18
   };
 
@@ -1532,6 +1585,8 @@ static void EmitDarwinBCHeader(BitstreamWriter &Stream,
     CPUType = DARWIN_CPU_TYPE_POWERPC;
   else if (TT.find("powerpc64-") == 0)
     CPUType = DARWIN_CPU_TYPE_POWERPC | DARWIN_CPU_ARCH_ABI64;
+  else if (isARMTriplet(TT))
+    CPUType = DARWIN_CPU_TYPE_ARM;
 
   // Traditional Bitcode starts after header.
   unsigned BCOffset = DarwinBCHeaderSize;
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index 595497f..aa4c3af 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -27,7 +27,7 @@ static bool isSingleValueType(const std::pair<const llvm::Type*,
 }
 
 static bool isIntegerValue(const std::pair<const Value*, unsigned> &V) {
-  return isa<IntegerType>(V.first->getType());
+  return V.first->getType()->isIntegerTy();
 }
 
 static bool CompareByFrequency(const std::pair<const llvm::Type*,
@@ -39,8 +39,6 @@ static bool CompareByFrequency(const std::pair<const llvm::Type*,
 
 /// ValueEnumerator - Enumerate module-level information.
 ValueEnumerator::ValueEnumerator(const Module *M) {
-  InstructionCount = 0;
-
   // Enumerate the global variables.
   for (Module::const_global_iterator I = M->global_begin(),
          E = M->global_end(); I != E; ++I)
@@ -377,6 +375,7 @@ void ValueEnumerator::EnumerateAttributes(const AttrListPtr &PAL) {
 
 
 void ValueEnumerator::incorporateFunction(const Function &F) {
+  InstructionCount = 0;
   NumModuleValues = Values.size();
 
   // Adding function arguments to the value table.
diff --git a/lib/CodeGen/Android.mk b/lib/CodeGen/Android.mk
new file mode 100644
index 0000000..9fa2ecd
--- /dev/null
+++ b/lib/CodeGen/Android.mk
@@ -0,0 +1,99 @@
+LOCAL_PATH:= $(call my-dir)
+
+codegen_SRC_FILES :=	\
+	AggressiveAntiDepBreaker.cpp	\
+	BranchFolding.cpp	\
+	CalcSpillWeights.cpp	\
+	CodePlacementOpt.cpp	\
+	CriticalAntiDepBreaker.cpp	\
+	DeadMachineInstructionElim.cpp	\
+	DwarfEHPrepare.cpp	\
+	ELFCodeEmitter.cpp	\
+	ELFWriter.cpp	\
+	ExactHazardRecognizer.cpp	\
+	GCMetadata.cpp	\
+	GCMetadataPrinter.cpp	\
+	GCStrategy.cpp	\
+	IfConversion.cpp	\
+	IntrinsicLowering.cpp	\
+	LLVMTargetMachine.cpp	\
+	LatencyPriorityQueue.cpp	\
+	LiveInterval.cpp	\
+	LiveIntervalAnalysis.cpp	\
+	LiveStackAnalysis.cpp	\
+	LiveVariables.cpp	\
+	LowerSubregs.cpp	\
+	MachineBasicBlock.cpp	\
+	MachineCSE.cpp	\
+	MachineDominators.cpp	\
+	MachineFunction.cpp	\
+	MachineFunctionAnalysis.cpp	\
+	MachineFunctionPass.cpp	\
+	MachineInstr.cpp	\
+	MachineLICM.cpp	\
+	MachineLoopInfo.cpp	\
+	MachineModuleInfo.cpp	\
+	MachineModuleInfoImpls.cpp	\
+	MachinePassRegistry.cpp	\
+	MachineRegisterInfo.cpp	\
+	MachineSSAUpdater.cpp	\
+	MachineSink.cpp	\
+	MachineVerifier.cpp	\
+	ObjectCodeEmitter.cpp	\
+	OcamlGC.cpp	\
+	OptimizeExts.cpp	\
+	OptimizePHIs.cpp	\
+	PHIElimination.cpp	\
+	Passes.cpp	\
+	PostRASchedulerList.cpp	\
+	PreAllocSplitting.cpp	\
+	ProcessImplicitDefs.cpp	\
+	PrologEpilogInserter.cpp	\
+	PseudoSourceValue.cpp	\
+	RegAllocLinearScan.cpp	\
+	RegAllocLocal.cpp	\
+	RegAllocPBQP.cpp	\
+	RegisterCoalescer.cpp	\
+	RegisterScavenging.cpp	\
+	ScheduleDAG.cpp	\
+	ScheduleDAGEmit.cpp	\
+	ScheduleDAGInstrs.cpp	\
+	ScheduleDAGPrinter.cpp	\
+	ShadowStackGC.cpp	\
+	ShrinkWrapping.cpp	\
+	SimpleRegisterCoalescing.cpp	\
+	SjLjEHPrepare.cpp	\
+	SlotIndexes.cpp	\
+	Spiller.cpp	\
+	StackProtector.cpp	\
+	StackSlotColoring.cpp	\
+	StrongPHIElimination.cpp	\
+	TailDuplication.cpp	\
+	TargetInstrInfoImpl.cpp	\
+	TargetLoweringObjectFileImpl.cpp	\
+	TwoAddressInstructionPass.cpp	\
+	UnreachableBlockElim.cpp	\
+	VirtRegMap.cpp	\
+	VirtRegRewriter.cpp
+
+# For the host
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(codegen_SRC_FILES)
+LOCAL_MODULE:= libLLVMCodeGen
+
+include $(LLVM_HOST_BUILD_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+# For the device
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(codegen_SRC_FILES)
+LOCAL_MODULE:= libLLVMCodeGen
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/lib/CodeGen/AsmPrinter/Android.mk b/lib/CodeGen/AsmPrinter/Android.mk
new file mode 100644
index 0000000..62601f0
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/Android.mk
@@ -0,0 +1,31 @@
+LOCAL_PATH := $(call my-dir)
+
+codegen_asmprinter_SRC_FILES :=	\
+	AsmPrinter.cpp	\
+	DIE.cpp	\
+	DwarfDebug.cpp	\
+	DwarfException.cpp	\
+	DwarfLabel.cpp	\
+	DwarfPrinter.cpp	\
+	DwarfWriter.cpp	\
+	OcamlGCPrinter.cpp
+
+# For the host
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(codegen_asmprinter_SRC_FILES)
+LOCAL_MODULE:= libLLVMAsmPrinter
+
+include $(LLVM_HOST_BUILD_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+# For the device
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(codegen_asmprinter_SRC_FILES)
+LOCAL_MODULE:= libLLVMAsmPrinter
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index fc08384..bbeb026 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -50,6 +50,7 @@
 #include "llvm/Support/Format.h"
 #include "llvm/Support/FormattedStream.h"
 #include <cerrno>
+#include <ctype.h>
 using namespace llvm;
 
 STATISTIC(EmittedInsts, "Number of machine instrs printed");
@@ -917,11 +918,10 @@ void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV,
   
   if (NumBits == 0) return;   // No need to emit alignment.
   
-  unsigned FillValue = 0;
   if (getCurrentSection()->getKind().isText())
-    FillValue = MAI->getTextAlignFillValue();
-  
-  OutStreamer.EmitValueToAlignment(1 << NumBits, FillValue, 1, 0);
+    OutStreamer.EmitCodeAlignment(1 << NumBits);
+  else
+    OutStreamer.EmitValueToAlignment(1 << NumBits, 0, 1, 0);
 }
 
 /// LowerConstant - Lower the specified LLVM Constant to an MCExpr.
@@ -1717,7 +1717,7 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
   }
 
   // Print the main label for the block.
-  if (MBB->pred_empty() || MBB->isOnlyReachableByFallthrough()) {
+  if (MBB->pred_empty() || isBlockOnlyReachableByFallthrough(MBB)) {
     if (VerboseAsm) {
       // NOTE: Want this comment at start of line.
       O << MAI->getCommentString() << " BB#" << MBB->getNumber() << ':';
@@ -1764,6 +1764,39 @@ void AsmPrinter::printOffset(int64_t Offset) const {
     O << Offset;
 }
 
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+bool AsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) 
+    const {
+  // If this is a landing pad, it isn't a fall through.  If it has no preds,
+  // then nothing falls through to it.
+  if (MBB->isLandingPad() || MBB->pred_empty())
+    return false;
+  
+  // If there isn't exactly one predecessor, it can't be a fall through.
+  MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
+  ++PI2;
+  if (PI2 != MBB->pred_end())
+    return false;
+  
+  // The predecessor has to be immediately before this block.
+  const MachineBasicBlock *Pred = *PI;
+  
+  if (!Pred->isLayoutSuccessor(MBB))
+    return false;
+  
+  // If the block is completely empty, then it definitely does fall through.
+  if (Pred->empty())
+    return true;
+  
+  // Otherwise, check the last instruction.
+  const MachineInstr &LastInst = Pred->back();
+  return !LastInst.getDesc().isBarrier();
+}
+
+
+
 GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) {
   if (!S->usesMetadata())
     return 0;
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 349e0ac..63360c0 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -313,6 +313,7 @@ void DIESectionOffset::EmitValue(DwarfPrinter *D, unsigned Form) const {
   D->EmitSectionOffset(Label.getTag(), Section.getTag(),
                        Label.getNumber(), Section.getNumber(),
                        IsSmall, IsEH, UseSet);
+  D->getAsm()->O << '\n'; // FIXME: Necesssary?
 }
 
 /// SizeOf - Determine size of delta value in bytes.
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 5093dd9..5ad1e5e 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -238,7 +238,18 @@ public:
         LIndex = DSI;
       }
     }
-    setLastInsn(LastInsn);
+
+    unsigned CurrentLastInsnIndex = 0;
+    if (const MachineInstr *CL = getLastInsn()) 
+      CurrentLastInsnIndex = MIIndexMap[CL];
+    unsigned FIndex = MIIndexMap[getFirstInsn()];
+
+    // Set LastInsn as the last instruction for this scope only if
+    // it follows 
+    //  1) this scope's first instruction and
+    //  2) current last instruction for this scope, if any.
+    if (LIndex >= CurrentLastInsnIndex && LIndex >= FIndex)
+      setLastInsn(LastInsn);
   }
 
 #ifndef NDEBUG
@@ -1166,7 +1177,9 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) {
     return SPDie;
 
   SPDie = new DIE(dwarf::DW_TAG_subprogram);
-  addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, SP.getName());
+  // Constructors and operators for anonymous aggregates do not have names.
+  if (!SP.getName().empty())
+    addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, SP.getName());
 
   StringRef LinkageName = SP.getLinkageName();
   if (!LinkageName.empty())
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index b6801dc..2b08ba4 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -50,26 +50,6 @@ DwarfException::~DwarfException() {
   delete ExceptionTimer;
 }
 
-/// SizeOfEncodedValue - Return the size of the encoding in bytes.
-unsigned DwarfException::SizeOfEncodedValue(unsigned Encoding) {
-  if (Encoding == dwarf::DW_EH_PE_omit)
-    return 0;
-
-  switch (Encoding & 0x07) {
-  case dwarf::DW_EH_PE_absptr:
-    return TD->getPointerSize();
-  case dwarf::DW_EH_PE_udata2:
-    return 2;
-  case dwarf::DW_EH_PE_udata4:
-    return 4;
-  case dwarf::DW_EH_PE_udata8:
-    return 8;
-  }
-
-  assert(0 && "Invalid encoded value.");
-  return 0;
-}
-
 /// CreateLabelDiff - Emit a label and subtract it from the expression we
 /// already have.  This is equivalent to emitting "foo - .", but we have to emit
 /// the label for "." directly.
@@ -100,7 +80,7 @@ void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) {
     TD->getPointerSize() : -TD->getPointerSize();
 
   const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
-  
+
   // Begin eh frame section.
   Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
 
@@ -128,30 +108,16 @@ void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) {
   // The personality presence indicates that language specific information will
   // show up in the eh frame.  Find out how we are supposed to lower the
   // personality function reference:
-  const MCExpr *PersonalityRef = 0;
-  bool IsPersonalityIndirect = false, IsPersonalityPCRel = false;
-  if (PersonalityFn) {
-    // FIXME: HANDLE STATIC CODEGEN MODEL HERE.
-    
-    // In non-static mode, ask the object file how to represent this reference.
-    PersonalityRef =
-      TLOF.getSymbolForDwarfGlobalReference(PersonalityFn, Asm->Mang,
-                                            Asm->MMI,
-                                            IsPersonalityIndirect,
-                                            IsPersonalityPCRel);
-  }
-  
-  unsigned PerEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
-  if (IsPersonalityIndirect)
-    PerEncoding |= dwarf::DW_EH_PE_indirect;
-  unsigned LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
-  unsigned FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+
+  unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+  unsigned FDEEncoding = TLOF.getFDEEncoding();
+  unsigned PerEncoding = TLOF.getPersonalityEncoding();
 
   char Augmentation[6] = { 0 };
   unsigned AugmentationSize = 0;
   char *APtr = Augmentation + 1;
 
-  if (PersonalityRef) {
+  if (PersonalityFn) {
     // There is a personality function.
     *APtr++ = 'P';
     AugmentationSize += 1 + SizeOfEncodedValue(PerEncoding);
@@ -181,20 +147,19 @@ void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) {
   Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), true));
   EOL("CIE Return Address Column");
 
-  EmitULEB128(AugmentationSize, "Augmentation Size");
-  EmitEncodingByte(PerEncoding, "Personality");
-
-  // If there is a personality, we need to indicate the function's location.
-  if (PersonalityRef) {
-    if (!IsPersonalityPCRel)
-      PersonalityRef = CreateLabelDiff(PersonalityRef, "personalityref_addr",
-                                       Index);
+  if (Augmentation[0]) {
+    EmitULEB128(AugmentationSize, "Augmentation Size");
 
-    O << MAI->getData32bitsDirective() << *PersonalityRef;
-    EOL("Personality");
-
-    EmitEncodingByte(LSDAEncoding, "LSDA");
-    EmitEncodingByte(FDEEncoding, "FDE");
+    // If there is a personality, we need to indicate the function's location.
+    if (PersonalityFn) {
+      EmitEncodingByte(PerEncoding, "Personality");
+      EmitReference(PersonalityFn, PerEncoding);
+      EOL("Personality");
+    }
+    if (UsesLSDA[Index])
+      EmitEncodingByte(LSDAEncoding, "LSDA");
+    if (FDEEncoding != dwarf::DW_EH_PE_absptr)
+      EmitEncodingByte(FDEEncoding, "FDE");
   }
 
   // Indicate locations of general callee saved registers in frame.
@@ -216,8 +181,12 @@ void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) {
          "Should not emit 'available externally' functions at all");
 
   const Function *TheFunc = EHFrameInfo.function;
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
 
-  Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getEHFrameSection());
+  unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+  unsigned FDEEncoding = TLOF.getFDEEncoding();
+
+  Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
 
   // Externally visible entry into the functions eh frame info. If the
   // corresponding function is static, this should not be externally visible.
@@ -255,7 +224,8 @@ void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) {
 
     // EH frame header.
     EmitDifference("eh_frame_end", EHFrameInfo.Number,
-                   "eh_frame_begin", EHFrameInfo.Number, true);
+                   "eh_frame_begin", EHFrameInfo.Number,
+                   true);
     EOL("Length of Frame Information Entry");
 
     EmitLabel("eh_frame_begin", EHFrameInfo.Number);
@@ -266,33 +236,23 @@ void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) {
 
     EOL("FDE CIE offset");
 
-    EmitReference("eh_func_begin", EHFrameInfo.Number, true, true);
+    EmitReference("eh_func_begin", EHFrameInfo.Number, FDEEncoding);
     EOL("FDE initial location");
     EmitDifference("eh_func_end", EHFrameInfo.Number,
-                   "eh_func_begin", EHFrameInfo.Number, true);
+                   "eh_func_begin", EHFrameInfo.Number,
+                   SizeOfEncodedValue(FDEEncoding) == 4);
     EOL("FDE address range");
 
     // If there is a personality and landing pads then point to the language
     // specific data area in the exception table.
     if (MMI->getPersonalities()[0] != NULL) {
+      unsigned Size = SizeOfEncodedValue(LSDAEncoding);
 
-      if (Asm->TM.getLSDAEncoding() != DwarfLSDAEncoding::EightByte) {
-        EmitULEB128(4, "Augmentation size");
-
-        if (EHFrameInfo.hasLandingPads)
-          EmitReference("exception", EHFrameInfo.Number, true, true);
-        else
-          Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
-      } else {
-        EmitULEB128(TD->getPointerSize(), "Augmentation size");
-
-        if (EHFrameInfo.hasLandingPads) {
-          EmitReference("exception", EHFrameInfo.Number, true, false);
-        } else {
-          Asm->OutStreamer.EmitIntValue(0, TD->getPointerSize(),
-                                        0/*addrspace*/);
-        }
-      }
+      EmitULEB128(Size, "Augmentation size");
+      if (EHFrameInfo.hasLandingPads)
+        EmitReference("exception", EHFrameInfo.Number, LSDAEncoding);
+      else
+        Asm->OutStreamer.EmitIntValue(0, Size/*size*/, 0/*addrspace*/);
 
       EOL("Language Specific Data Area");
     } else {
@@ -407,20 +367,22 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
 
     if (NumShared < TypeIds.size()) {
       unsigned SizeAction = 0;
-      ActionEntry *PrevAction = 0;
+      unsigned PrevAction = (unsigned)-1;
 
       if (NumShared) {
         const unsigned SizePrevIds = PrevLPI->TypeIds.size();
         assert(Actions.size());
-        PrevAction = &Actions.back();
-        SizeAction = MCAsmInfo::getSLEB128Size(PrevAction->NextAction) +
-          MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
+        PrevAction = Actions.size() - 1;
+        SizeAction =
+          MCAsmInfo::getSLEB128Size(Actions[PrevAction].NextAction) +
+          MCAsmInfo::getSLEB128Size(Actions[PrevAction].ValueForTypeID);
 
         for (unsigned j = NumShared; j != SizePrevIds; ++j) {
+          assert(PrevAction != (unsigned)-1 && "PrevAction is invalid!");
           SizeAction -=
-            MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
-          SizeAction += -PrevAction->NextAction;
-          PrevAction = PrevAction->Previous;
+            MCAsmInfo::getSLEB128Size(Actions[PrevAction].ValueForTypeID);
+          SizeAction += -Actions[PrevAction].NextAction;
+          PrevAction = Actions[PrevAction].Previous;
         }
       }
 
@@ -437,7 +399,7 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
 
         ActionEntry Action = { ValueForTypeID, NextAction, PrevAction };
         Actions.push_back(Action);
-        PrevAction = &Actions.back();
+        PrevAction = Actions.size() - 1;
       }
 
       // Record the first action of the landing pad site.
@@ -447,7 +409,7 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
     // Information used when created the call-site table. The action record
     // field of the call site record is the offset of the first associated
     // action record, relative to the start of the actions table. This value is
-    // biased by 1 (1 in dicating the start of the actions table), and 0
+    // biased by 1 (1 indicating the start of the actions table), and 0
     // indicates that there are no actions.
     FirstActions.push_back(FirstAction);
 
@@ -648,8 +610,7 @@ void DwarfException::EmitExceptionTable() {
   // landing pad site.
   SmallVector<ActionEntry, 32> Actions;
   SmallVector<unsigned, 64> FirstActions;
-  unsigned SizeActions = ComputeActionsTable(LandingPads, Actions,
-                                             FirstActions);
+  unsigned SizeActions=ComputeActionsTable(LandingPads, Actions, FirstActions);
 
   // Invokes and nounwind calls have entries in PadMap (due to being bracketed
   // by try-range labels when lowered).  Ordinary calls do not, so appropriate
@@ -677,29 +638,29 @@ void DwarfException::EmitExceptionTable() {
   const unsigned LandingPadSize = SizeOfEncodedValue(dwarf::DW_EH_PE_udata4);
   bool IsSJLJ = MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
   bool HaveTTData = IsSJLJ ? (!TypeInfos.empty() || !FilterIds.empty()) : true;
-  unsigned SizeSites;
+  unsigned CallSiteTableLength;
 
   if (IsSJLJ)
-    SizeSites = 0;
+    CallSiteTableLength = 0;
   else
-    SizeSites = CallSites.size() *
+    CallSiteTableLength = CallSites.size() *
       (SiteStartSize + SiteLengthSize + LandingPadSize);
 
   for (unsigned i = 0, e = CallSites.size(); i < e; ++i) {
-    SizeSites += MCAsmInfo::getULEB128Size(CallSites[i].Action);
+    CallSiteTableLength += MCAsmInfo::getULEB128Size(CallSites[i].Action);
     if (IsSJLJ)
-      SizeSites += MCAsmInfo::getULEB128Size(i);
+      CallSiteTableLength += MCAsmInfo::getULEB128Size(i);
   }
 
   // Type infos.
   const MCSection *LSDASection = Asm->getObjFileLowering().getLSDASection();
-  unsigned TTypeFormat;
+  unsigned TTypeEncoding;
   unsigned TypeFormatSize;
 
   if (!HaveTTData) {
     // For SjLj exceptions, if there is no TypeInfo, then we just explicitly say
     // that we're omitting that bit.
-    TTypeFormat = dwarf::DW_EH_PE_omit;
+    TTypeEncoding = dwarf::DW_EH_PE_omit;
     TypeFormatSize = SizeOfEncodedValue(dwarf::DW_EH_PE_absptr);
   } else {
     // Okay, we have actual filters or typeinfos to emit.  As such, we need to
@@ -729,21 +690,28 @@ void DwarfException::EmitExceptionTable() {
     // somewhere.  This predicate should be moved to a shared location that is
     // in target-independent code.
     //
-    if (LSDASection->getKind().isWriteable() ||
-        Asm->TM.getRelocationModel() == Reloc::Static)
-      TTypeFormat = dwarf::DW_EH_PE_absptr;
-    else
-      TTypeFormat = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
-        dwarf::DW_EH_PE_sdata4;
-
-    TypeFormatSize = SizeOfEncodedValue(TTypeFormat);
+    TTypeEncoding = Asm->getObjFileLowering().getTTypeEncoding();
+    TypeFormatSize = SizeOfEncodedValue(TTypeEncoding);
   }
 
   // Begin the exception table.
   Asm->OutStreamer.SwitchSection(LSDASection);
   Asm->EmitAlignment(2, 0, 0, false);
 
+  // Emit the LSDA.
   O << "GCC_except_table" << SubprogramCount << ":\n";
+  EmitLabel("exception", SubprogramCount);
+
+  if (IsSJLJ) {
+    SmallString<16> LSDAName;
+    raw_svector_ostream(LSDAName) << MAI->getPrivateGlobalPrefix() <<
+      "_LSDA_" << Asm->getFunctionNumber();
+    O << LSDAName.str() << ":\n";
+  }
+
+  // Emit the LSDA header.
+  EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");
+  EmitEncodingByte(TTypeEncoding, "@TType");
 
   // The type infos need to be aligned. GCC does this by inserting padding just
   // before the type infos. However, this changes the size of the exception
@@ -752,7 +720,7 @@ void DwarfException::EmitExceptionTable() {
   // So by increasing the size by inserting padding, you may increase the number
   // of bytes used for writing the size. If it increases, say by one byte, then
   // you now need to output one less byte of padding to get the type infos
-  // aligned.  However this decreases the size of the exception table. This
+  // aligned. However this decreases the size of the exception table. This
   // changes the value you have to output for the exception table size. Due to
   // the variable length encoding, the number of bytes used for writing the
   // length may decrease. If so, you then have to increase the amount of
@@ -761,41 +729,35 @@ void DwarfException::EmitExceptionTable() {
   // We chose another solution: don't output padding inside the table like GCC
   // does, instead output it before the table.
   unsigned SizeTypes = TypeInfos.size() * TypeFormatSize;
-  unsigned TyOffset = sizeof(int8_t) +          // Call site format
-    MCAsmInfo::getULEB128Size(SizeSites) +      // Call site table length
-    SizeSites + SizeActions + SizeTypes;
-  unsigned TotalSize = sizeof(int8_t) +         // LPStart format
-                       sizeof(int8_t) +         // TType format
-    (HaveTTData ?
-     MCAsmInfo::getULEB128Size(TyOffset) : 0) + // TType base offset
-    TyOffset;
+  unsigned CallSiteTableLengthSize =
+    MCAsmInfo::getULEB128Size(CallSiteTableLength);
+  unsigned TTypeBaseOffset =
+    sizeof(int8_t) +                            // Call site format
+    CallSiteTableLengthSize +                   // Call site table length size
+    CallSiteTableLength +                       // Call site table length
+    SizeActions +                               // Actions size
+    SizeTypes;
+  unsigned TTypeBaseOffsetSize = MCAsmInfo::getULEB128Size(TTypeBaseOffset);
+  unsigned TotalSize =
+    sizeof(int8_t) +                            // LPStart format
+    sizeof(int8_t) +                            // TType format
+    (HaveTTData ? TTypeBaseOffsetSize : 0) +    // TType base offset size
+    TTypeBaseOffset;                            // TType base offset
   unsigned SizeAlign = (4 - TotalSize) & 3;
 
-  for (unsigned i = 0; i != SizeAlign; ++i) {
-    Asm->EmitInt8(0);
-    EOL("Padding");
-  }
-
-  EmitLabel("exception", SubprogramCount);
-
-  if (IsSJLJ) {
-    SmallString<16> LSDAName;
-    raw_svector_ostream(LSDAName) << MAI->getPrivateGlobalPrefix() <<
-      "_LSDA_" << Asm->getFunctionNumber();
-    O << LSDAName.str() << ":\n";
+  if (HaveTTData) {
+    // Account for any extra padding that will be added to the call site table
+    // length.
+    EmitULEB128(TTypeBaseOffset, "@TType base offset", SizeAlign);
+    SizeAlign = 0;
   }
 
-  // Emit the header.
-  EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");
-  EmitEncodingByte(TTypeFormat, "@TType");
-
-  if (HaveTTData)
-    EmitULEB128(TyOffset, "@TType base offset");
-
   // SjLj Exception handling
   if (IsSJLJ) {
     EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
-    EmitULEB128(SizeSites, "Call site table length");
+
+    // Add extra padding if it wasn't added to the TType base offset.
+    EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign);
 
     // Emit the landing pad site information.
     unsigned idx = 0;
@@ -836,7 +798,9 @@ void DwarfException::EmitExceptionTable() {
 
     // Emit the landing pad call site table.
     EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
-    EmitULEB128(SizeSites, "Call site table length");
+
+    // Add extra padding if it wasn't added to the TType base offset.
+    EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign);
 
     for (SmallVectorImpl<CallSiteEntry>::const_iterator
          I = CallSites.begin(), E = CallSites.end(); I != E; ++I) {
@@ -906,23 +870,23 @@ void DwarfException::EmitExceptionTable() {
   }
 
   // Emit the Catch TypeInfos.
-  if (TypeInfos.size() != 0) EOL("-- Catch TypeInfos --");
+  if (!TypeInfos.empty()) EOL("-- Catch TypeInfos --");
   for (std::vector<GlobalVariable *>::const_reverse_iterator
          I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
     const GlobalVariable *GV = *I;
-    PrintRelDirective();
 
     if (GV) {
-      O << *Asm->GetGlobalValueSymbol(GV);
+      EmitReference(GV, TTypeEncoding);
       EOL("TypeInfo");
     } else {
+      PrintRelDirective(TTypeEncoding);
       O << "0x0";
       EOL("");
     }
   }
 
   // Emit the Exception Specifications.
-  if (FilterIds.size() != 0) EOL("-- Filter IDs --");
+  if (!FilterIds.empty()) EOL("-- Filter IDs --");
   for (std::vector<unsigned>::const_iterator
          I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) {
     unsigned TypeID = *I;
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index 06033a1..3db1a00 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -76,9 +76,6 @@ class DwarfException : public DwarfPrinter {
   /// ExceptionTimer - Timer for the Dwarf exception writer.
   Timer *ExceptionTimer;
 
-  /// SizeOfEncodedValue - Return the size of the encoding in bytes.
-  unsigned SizeOfEncodedValue(unsigned Encoding);
-
   /// EmitCIE - Emit a Common Information Entry (CIE). This holds information
   /// that is shared among many Frame Description Entries.  There is at least
   /// one CIE in every non-empty .debug_frame section.
@@ -135,7 +132,7 @@ class DwarfException : public DwarfPrinter {
   struct ActionEntry {
     int ValueForTypeID; // The value to write - may not be equal to the type id.
     int NextAction;
-    struct ActionEntry *Previous;
+    unsigned Previous;
   };
 
   /// CallSiteEntry - Structure describing an entry in the call-site table.
diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
index 415390b..28ff0eb 100644
--- a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 //
 // Emit general DWARF directives.
-// 
+//
 //===----------------------------------------------------------------------===//
 
 #include "DwarfPrinter.h"
@@ -18,13 +18,17 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/SmallString.h"
 using namespace llvm;
 
 DwarfPrinter::DwarfPrinter(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T,
@@ -33,6 +37,26 @@ DwarfPrinter::DwarfPrinter(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T,
   RI(Asm->TM.getRegisterInfo()), M(NULL), MF(NULL), MMI(NULL),
   SubprogramCount(0), Flavor(flavor), SetCounter(1) {}
 
+/// SizeOfEncodedValue - Return the size of the encoding in bytes.
+unsigned DwarfPrinter::SizeOfEncodedValue(unsigned Encoding) const {
+  if (Encoding == dwarf::DW_EH_PE_omit)
+    return 0;
+
+  switch (Encoding & 0x07) {
+  case dwarf::DW_EH_PE_absptr:
+    return TD->getPointerSize();
+  case dwarf::DW_EH_PE_udata2:
+    return 2;
+  case dwarf::DW_EH_PE_udata4:
+    return 4;
+  case dwarf::DW_EH_PE_udata8:
+    return 8;
+  }
+
+  assert(0 && "Invalid encoded value.");
+  return 0;
+}
+
 void DwarfPrinter::PrintRelDirective(bool Force32Bit, bool isInSection) const {
   if (isInSection && MAI->getDwarfSectionOffsetDirective())
     O << MAI->getDwarfSectionOffsetDirective();
@@ -42,6 +66,14 @@ void DwarfPrinter::PrintRelDirective(bool Force32Bit, bool isInSection) const {
     O << MAI->getData64bitsDirective();
 }
 
+void DwarfPrinter::PrintRelDirective(unsigned Encoding) const {
+  unsigned Size = SizeOfEncodedValue(Encoding);
+  assert((Size == 4 || Size == 8) && "Do not support other types or rels!");
+
+  O << (Size == 4 ?
+        MAI->getData32bitsDirective() : MAI->getData64bitsDirective());
+}
+
 /// EOL - Print a newline character to asm stream.  If a comment is present
 /// then it will be printed first.  Comments should not contain '\n'.
 void DwarfPrinter::EOL(const Twine &Comment) const {
@@ -127,29 +159,35 @@ void DwarfPrinter::EmitSLEB128(int Value, const char *Desc) const {
     Value >>= 7;
     IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
     if (IsMore) Byte |= 0x80;
-    
     Asm->OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0);
   } while (IsMore);
 }
 
 /// EmitULEB128 - emit the specified signed leb128 value.
-void DwarfPrinter::EmitULEB128(unsigned Value, const char *Desc) const {
+void DwarfPrinter::EmitULEB128(unsigned Value, const char *Desc,
+                               unsigned PadTo) const {
   if (Asm->VerboseAsm && Desc)
     Asm->OutStreamer.AddComment(Desc);
  
-  if (MAI->hasLEB128()) {
+  if (MAI->hasLEB128() && PadTo == 0) {
     O << "\t.uleb128\t" << Value;
     Asm->OutStreamer.AddBlankLine();
     return;
   }
   
-  // If we don't have .uleb128, emit as .bytes.
+  // If we don't have .uleb128 or we want to emit padding, emit as .bytes.
   do {
     unsigned char Byte = static_cast<unsigned char>(Value & 0x7f);
     Value >>= 7;
-    if (Value) Byte |= 0x80;
+    if (Value || PadTo != 0) Byte |= 0x80;
     Asm->OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0);
   } while (Value);
+
+  if (PadTo) {
+    if (PadTo > 1)
+      Asm->OutStreamer.EmitFill(PadTo - 1, 0x80/*fillval*/, 0/*addrspace*/);
+    Asm->OutStreamer.EmitFill(1, 0/*fillval*/, 0/*addrspace*/);
+  }
 }
 
 
@@ -195,6 +233,31 @@ void DwarfPrinter::EmitReference(const MCSymbol *Sym, bool IsPCRelative,
   if (IsPCRelative) O << "-" << MAI->getPCSymbol();
 }
 
+void DwarfPrinter::EmitReference(const char *Tag, unsigned Number,
+                                 unsigned Encoding) const {
+  SmallString<64> Name;
+  raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix()
+                            << Tag << Number;
+
+  MCSymbol *Sym = Asm->OutContext.GetOrCreateSymbol(Name.str());
+  EmitReference(Sym, Encoding);
+}
+
+void DwarfPrinter::EmitReference(const MCSymbol *Sym, unsigned Encoding) const {
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+  PrintRelDirective(Encoding);
+  O << *TLOF.getSymbolForDwarfReference(Sym, Asm->MMI, Encoding);;
+}
+
+void DwarfPrinter::EmitReference(const GlobalValue *GV, unsigned Encoding)const {
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+  PrintRelDirective(Encoding);
+  O << *TLOF.getSymbolForDwarfGlobalReference(GV, Asm->Mang,
+                                              Asm->MMI, Encoding);;
+}
+
 /// EmitDifference - Emit the difference between two labels.  If this assembler
 /// supports .set, we emit a .set of a temporary and then use it in the .word.
 void DwarfPrinter::EmitDifference(const char *TagHi, unsigned NumberHi,
@@ -248,7 +311,6 @@ void DwarfPrinter::EmitSectionOffset(const char* Label, const char* Section,
     PrintRelDirective(IsSmall);
     PrintLabelName("set", SetCounter, Flavor);
     ++SetCounter;
-    O << "\n";
   } else {
     PrintRelDirective(IsSmall, true);
     PrintLabelName(Label, LabelNumber);
@@ -257,7 +319,6 @@ void DwarfPrinter::EmitSectionOffset(const char* Label, const char* Section,
       O << "-";
       PrintLabelName(Section, SectionNumber);
     }
-    O << "\n";
   }
 }
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.h b/lib/CodeGen/AsmPrinter/DwarfPrinter.h
index 69d9c27..bd715f2 100644
--- a/lib/CodeGen/AsmPrinter/DwarfPrinter.h
+++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.h
@@ -28,6 +28,7 @@ class Module;
 class MCAsmInfo;
 class TargetData;
 class TargetRegisterInfo;
+class GlobalValue;
 class MCSymbol;
 class Twine;
 
@@ -85,6 +86,10 @@ public:
   const MCAsmInfo *getMCAsmInfo() const { return MAI; }
   const TargetData *getTargetData() const { return TD; }
 
+  /// SizeOfEncodedValue - Return the size of the encoding in bytes.
+  unsigned SizeOfEncodedValue(unsigned Encoding) const;
+
+  void PrintRelDirective(unsigned Encoding) const;
   void PrintRelDirective(bool Force32Bit = false,
                          bool isInSection = false) const;
 
@@ -106,7 +111,8 @@ public:
   void EmitSLEB128(int Value, const char *Desc) const;
 
   /// EmitULEB128 - emit the specified unsigned leb128 value.
-  void EmitULEB128(unsigned Value, const char *Desc = 0) const;
+  void EmitULEB128(unsigned Value, const char *Desc = 0,
+                   unsigned PadTo = 0) const;
 
   
   /// PrintLabelName - Print label name in form used by Dwarf writer.
@@ -140,6 +146,10 @@ public:
   void EmitReference(const MCSymbol *Sym, bool IsPCRelative = false,
                      bool Force32Bit = false) const;
 
+  void EmitReference(const char *Tag, unsigned Number, unsigned Encoding) const;
+  void EmitReference(const MCSymbol *Sym, unsigned Encoding) const;
+  void EmitReference(const GlobalValue *GV, unsigned Encoding) const;
+
   /// EmitDifference - Emit the difference between two labels.
   void EmitDifference(const DWLabel &LabelHi, const DWLabel &LabelLo,
                       bool IsSmall = false) {
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 3531ed6..a9502fd 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -22,6 +22,7 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
+#include <ctype.h>
 using namespace llvm;
 
 namespace {
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index faf4d95..d94729a 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -334,7 +334,9 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
   unsigned TailLen = 0;
   while (I1 != MBB1->begin() && I2 != MBB2->begin()) {
     --I1; --I2;
-    if (!I1->isIdenticalTo(I2) ||
+    // Don't merge debugging pseudos.
+    if (I1->isDebugValue() || I2->isDebugValue() ||
+        !I1->isIdenticalTo(I2) ||
         // FIXME: This check is dubious. It's used to get around a problem where
         // people incorrectly expect inline asm directives to remain in the same
         // relative order. This is untenable because normal compiler
@@ -412,6 +414,8 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
                                 MachineBasicBlock::iterator E) {
   unsigned Time = 0;
   for (; I != E; ++I) {
+    if (I->isDebugValue())
+      continue;
     const TargetInstrDesc &TID = I->getDesc();
     if (TID.isCall())
       Time += 10;
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 9fcbea9..d385b86 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -22,6 +22,7 @@ add_llvm_library(LLVMCodeGen
   LiveVariables.cpp
   LowerSubregs.cpp
   MachineBasicBlock.cpp
+  MachineCSE.cpp
   MachineDominators.cpp
   MachineFunction.cpp
   MachineFunctionAnalysis.cpp
@@ -39,6 +40,7 @@ add_llvm_library(LLVMCodeGen
   ObjectCodeEmitter.cpp
   OcamlGC.cpp
   OptimizeExts.cpp
+  OptimizePHIs.cpp
   PHIElimination.cpp
   Passes.cpp
   PostRASchedulerList.cpp
@@ -66,6 +68,7 @@ add_llvm_library(LLVMCodeGen
   StrongPHIElimination.cpp
   TailDuplication.cpp
   TargetInstrInfoImpl.cpp
+  TargetLoweringObjectFileImpl.cpp
   TwoAddressInstructionPass.cpp
   UnreachableBlockElim.cpp
   VirtRegMap.cpp
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index 2bedd04..a328d0e 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -131,10 +131,7 @@ bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &fn) {
       if (Hint.first || Hint.second)
         li.weight *= 1.01F;
 
-      // Divide the weight of the interval by its size.  This encourages
-      // spilling of intervals that are large and have few uses, and
-      // discourages spilling of small intervals with many uses.
-      li.weight /= lis->getApproximateInstructionCount(li) * SlotIndex::NUM;
+      lis->normalizeSpillWeight(li);
     }
   }
   
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
index 05a57d4..3ff2a04 100644
--- a/lib/CodeGen/CodePlacementOpt.cpp
+++ b/lib/CodeGen/CodePlacementOpt.cpp
@@ -102,22 +102,23 @@ bool CodePlacementOpt::HasAnalyzableTerminator(MachineBasicBlock *MBB) {
   // Conservatively ignore EH landing pads.
   if (MBB->isLandingPad()) return false;
 
-  // Ignore blocks which look like they might have EH-related control flow.
-  // At the time of this writing, there are blocks which AnalyzeBranch
-  // thinks end in single uncoditional branches, yet which have two CFG
-  // successors. Code in this file is not prepared to reason about such things.
-  if (!MBB->empty() && MBB->back().isEHLabel())
-    return false;
-
   // Aggressively handle return blocks and similar constructs.
   if (MBB->succ_empty()) return true;
 
   // Ask the target's AnalyzeBranch if it can handle this block.
   MachineBasicBlock *TBB = 0, *FBB = 0;
   SmallVector<MachineOperand, 4> Cond;
-  // Make the terminator is understood.
+  // Make sure the terminator is understood.
   if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond))
     return false;
+   // Ignore blocks which look like they might have EH-related control flow.
+   // AnalyzeBranch thinks it knows how to analyze such things, but it doesn't
+   // recognize the possibility of a control transfer through an unwind.
+   // Such blocks contain EH_LABEL instructions, however they may be in the
+   // middle of the block. Instead of searching for them, just check to see
+   // if the CFG disagrees with AnalyzeBranch.
+  if (1u + !Cond.empty() != MBB->succ_size())
+    return false;
   // Make sure we have the option of reversing the condition.
   if (!Cond.empty() && TII->ReverseBranchCondition(Cond))
     return false;
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 056e2d5..7d3de89 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -119,6 +119,8 @@ void CriticalAntiDepBreaker::FinishBlock() {
 
 void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
                                      unsigned InsertPosIndex) {
+  if (MI->isDebugValue())
+    return;
   assert(Count < InsertPosIndex && "Instruction index out of expected range!");
 
   // Any register which was defined within the previous scheduling region
@@ -409,6 +411,8 @@ BreakAntiDependencies(std::vector<SUnit>& SUnits,
   for (MachineBasicBlock::iterator I = End, E = Begin;
        I != E; --Count) {
     MachineInstr *MI = --I;
+    if (MI->isDebugValue())
+      continue;
 
     // Check if this instruction has a dependence on the critical path that
     // is an anti-dependence that we may be able to break. If it is, set
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index b0cb24d..d69c995 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -55,7 +55,7 @@ FunctionPass *llvm::createDeadMachineInstructionElimPass() {
 bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
   // Don't delete instructions with side effects.
   bool SawStore = false;
-  if (!MI->isSafeToMove(TII, SawStore, 0))
+  if (!MI->isSafeToMove(TII, 0, SawStore) && !MI->isPHI())
     return false;
 
   // Examine each operand.
@@ -64,8 +64,8 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
     if (MO.isReg() && MO.isDef()) {
       unsigned Reg = MO.getReg();
       if (TargetRegisterInfo::isPhysicalRegister(Reg) ?
-          LivePhysRegs[Reg] : !MRI->use_empty(Reg)) {
-        // This def has a use. Don't delete the instruction!
+          LivePhysRegs[Reg] : !MRI->use_nodbg_empty(Reg)) {
+        // This def has a non-debug use. Don't delete the instruction!
         return false;
       }
     }
@@ -111,23 +111,31 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
          MIE = MBB->rend(); MII != MIE; ) {
       MachineInstr *MI = &*MII;
 
-      if (MI->isDebugValue()) {
-        // Don't delete the DBG_VALUE itself, but if its Value operand is
-        // a vreg and this is the only use, substitute an undef operand;
-        // the former operand will then be deleted normally.
-        if (MI->getNumOperands()==3 && MI->getOperand(0).isReg()) {
-          unsigned Reg = MI->getOperand(0).getReg();
-          MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg);
-          assert(I != MRI->use_end());
-          if (++I == MRI->use_end())
-            // only one use, which must be this DBG_VALUE.
-            MI->getOperand(0).setReg(0U);
-        }
-      }
-
       // If the instruction is dead, delete it!
       if (isDead(MI)) {
         DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI);
+        // It is possible that some DBG_VALUE instructions refer to this
+        // instruction.  Examine each def operand for such references;
+        // if found, mark the DBG_VALUE as undef (but don't delete it).
+        for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+          const MachineOperand &MO = MI->getOperand(i);
+          if (!MO.isReg() || !MO.isDef())
+            continue;
+          unsigned Reg = MO.getReg();
+          if (!TargetRegisterInfo::isVirtualRegister(Reg))
+            continue;
+          MachineRegisterInfo::use_iterator nextI;
+          for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
+               E = MRI->use_end(); I!=E; I=nextI) {
+            nextI = llvm::next(I);  // I is invalidated by the setReg
+            MachineOperand& Use = I.getOperand();
+            MachineInstr *UseMI = Use.getParent();
+            if (UseMI==MI)
+              continue;
+            assert(Use.isDebug());
+            UseMI->getOperand(0).setReg(0U);
+          }
+        }
         AnyChanges = true;
         MI->eraseFromParent();
         ++NumDeletes;
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 9997a48..87ab7ef 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -155,7 +155,7 @@ void IntrinsicLowering::AddPrototypes(Module &M) {
 /// LowerBSWAP - Emit the code to lower bswap of V before the specified
 /// instruction IP.
 static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
-  assert(V->getType()->isInteger() && "Can't bswap a non-integer type!");
+  assert(V->getType()->isIntegerTy() && "Can't bswap a non-integer type!");
 
   unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
   
@@ -251,7 +251,7 @@ static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
 /// LowerCTPOP - Emit the code to lower ctpop of V before the specified
 /// instruction IP.
 static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) {
-  assert(V->getType()->isInteger() && "Can't ctpop a non-integer type!");
+  assert(V->getType()->isIntegerTy() && "Can't ctpop a non-integer type!");
 
   static const uint64_t MaskValues[6] = {
     0x5555555555555555ULL, 0x3333333333333333ULL,
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 40e0150..5e88865 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/PassManager.h"
 #include "llvm/Pass.h"
+#include "llvm/Analysis/Verifier.h"
 #include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/Passes.h"
@@ -66,6 +67,9 @@ static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
     cl::desc("Verify generated machine code"),
     cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
 
+static cl::opt<bool> EnableMachineCSE("enable-machine-cse", cl::Hidden,
+    cl::desc("Enable Machine CSE"));
+
 static cl::opt<cl::boolOrDefault>
 AsmVerbose("asm-verbose", cl::desc("Add comments to directives."),
            cl::init(cl::BOU_UNSET));
@@ -114,9 +118,10 @@ LLVMTargetMachine::setCodeModelForStatic() {
 bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
                                             formatted_raw_ostream &Out,
                                             CodeGenFileType FileType,
-                                            CodeGenOpt::Level OptLevel) {
+                                            CodeGenOpt::Level OptLevel,
+                                            bool DisableVerify) {
   // Add common CodeGen passes.
-  if (addCommonCodeGenPasses(PM, OptLevel))
+  if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify))
     return true;
 
   OwningPtr<MCContext> Context(new MCContext());
@@ -140,7 +145,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
   case CGFT_ObjectFile: {
     // Create the code emitter for the target if it exists.  If not, .o file
     // emission fails.
-    MCCodeEmitter *MCE = getTarget().createCodeEmitter(*this);
+    MCCodeEmitter *MCE = getTarget().createCodeEmitter(*this, *Context);
     if (MCE == 0)
       return true;
     
@@ -192,12 +197,13 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
 ///
 bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
                                                    JITCodeEmitter &JCE,
-                                                   CodeGenOpt::Level OptLevel) {
+                                                   CodeGenOpt::Level OptLevel,
+                                                   bool DisableVerify) {
   // Make sure the code model is set.
   setCodeModelForJIT();
   
   // Add common CodeGen passes.
-  if (addCommonCodeGenPasses(PM, OptLevel))
+  if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify))
     return true;
 
   addCodeEmitter(PM, OptLevel, JCE);
@@ -206,6 +212,12 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
   return false; // success!
 }
 
+static void printNoVerify(PassManagerBase &PM,
+                           const char *Banner) {
+  if (PrintMachineCode)
+    PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
+}
+
 static void printAndVerify(PassManagerBase &PM,
                            const char *Banner,
                            bool allowDoubleDefs = false) {
@@ -220,13 +232,19 @@ static void printAndVerify(PassManagerBase &PM,
 /// emitting to assembly files or machine code output.
 ///
 bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
-                                               CodeGenOpt::Level OptLevel) {
+                                               CodeGenOpt::Level OptLevel,
+                                               bool DisableVerify) {
   // Standard LLVM-Level Passes.
 
+  // Before running any passes, run the verifier to determine if the input
+  // coming from the front-end and/or optimizer is valid.
+  if (!DisableVerify)
+    PM.add(createVerifierPass());
+
   // Optionally, tun split-GEPs and no-load GVN.
   if (EnableSplitGEPGVN) {
     PM.add(createGEPSplitterPass());
-    PM.add(createGVNPass(/*NoPRE=*/false, /*NoLoads=*/true));
+    PM.add(createGVNPass(/*NoLoads=*/true));
   }
 
   // Run loop strength reduction before anything else.
@@ -273,6 +291,11 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
                                    "*** Final LLVM Code input to ISel ***\n",
                                    &dbgs()));
 
+  // All passes which modify the LLVM IR are now complete; run the verifier
+  // to ensure that the IR is valid.
+  if (!DisableVerify)
+    PM.add(createVerifierPass());
+
   // Standard Lower-Level Passes.
 
   // Set up a MachineFunction for the rest of CodeGen to work on.
@@ -291,6 +314,10 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
   printAndVerify(PM, "After Instruction Selection",
                  /* allowDoubleDefs= */ true);
 
+  // Optimize PHIs before DCE: removing dead PHI cycles may make more
+  // instructions dead.
+  if (OptLevel != CodeGenOpt::None)
+    PM.add(createOptimizePHIsPass());
 
   // Delete dead machine instructions regardless of optimization level.
   PM.add(createDeadMachineInstructionElimPass());
@@ -301,6 +328,8 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
     PM.add(createOptimizeExtsPass());
     if (!DisableMachineLICM)
       PM.add(createMachineLICMPass());
+    if (EnableMachineCSE)
+      PM.add(createMachineCSEPass());
     if (!DisableMachineSink)
       PM.add(createMachineSinkingPass());
     printAndVerify(PM, "After MachineLICM and MachineSinking",
@@ -355,13 +384,13 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
   // Branch folding must be run after regalloc and prolog/epilog insertion.
   if (OptLevel != CodeGenOpt::None && !DisableBranchFold) {
     PM.add(createBranchFoldingPass(getEnableTailMergeDefault()));
-    printAndVerify(PM, "After BranchFolding");
+    printNoVerify(PM, "After BranchFolding");
   }
 
   // Tail duplication.
   if (OptLevel != CodeGenOpt::None && !DisableTailDuplicate) {
     PM.add(createTailDuplicatePass(false));
-    printAndVerify(PM, "After TailDuplicate");
+    printNoVerify(PM, "After TailDuplicate");
   }
 
   PM.add(createGCMachineCodeAnalysisPass());
@@ -371,11 +400,11 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
 
   if (OptLevel != CodeGenOpt::None && !DisableCodePlace) {
     PM.add(createCodePlacementOptPass());
-    printAndVerify(PM, "After CodePlacementOpt");
+    printNoVerify(PM, "After CodePlacementOpt");
   }
 
   if (addPreEmitPass(PM, OptLevel))
-    printAndVerify(PM, "After PreEmit passes");
+    printNoVerify(PM, "After PreEmit passes");
 
   return false;
 }
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 432409a..ccda66f 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -329,24 +329,43 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
     DEBUG(dbgs() << " +" << NewLR);
     interval.addRange(NewLR);
 
-    // Iterate over all of the blocks that the variable is completely
-    // live in, adding [insrtIndex(begin), instrIndex(end)+4) to the
-    // live interval.
-    for (SparseBitVector<>::iterator I = vi.AliveBlocks.begin(), 
-             E = vi.AliveBlocks.end(); I != E; ++I) {
-      MachineBasicBlock *aliveBlock = mf_->getBlockNumbered(*I);
-      LiveRange LR(getMBBStartIdx(aliveBlock), getMBBEndIdx(aliveBlock), ValNo);
-      interval.addRange(LR);
-      DEBUG(dbgs() << " +" << LR);
+    bool PHIJoin = lv_->isPHIJoin(interval.reg);
+
+    if (PHIJoin) {
+      // A phi join register is killed at the end of the MBB and revived as a new
+      // valno in the killing blocks.
+      assert(vi.AliveBlocks.empty() && "Phi join can't pass through blocks");
+      DEBUG(dbgs() << " phi-join");
+      ValNo->addKill(indexes_->getTerminatorGap(mbb));
+      ValNo->setHasPHIKill(true);
+    } else {
+      // Iterate over all of the blocks that the variable is completely
+      // live in, adding [insrtIndex(begin), instrIndex(end)+4) to the
+      // live interval.
+      for (SparseBitVector<>::iterator I = vi.AliveBlocks.begin(),
+               E = vi.AliveBlocks.end(); I != E; ++I) {
+        MachineBasicBlock *aliveBlock = mf_->getBlockNumbered(*I);
+        LiveRange LR(getMBBStartIdx(aliveBlock), getMBBEndIdx(aliveBlock), ValNo);
+        interval.addRange(LR);
+        DEBUG(dbgs() << " +" << LR);
+      }
     }
 
     // Finally, this virtual register is live from the start of any killing
     // block to the 'use' slot of the killing instruction.
     for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) {
       MachineInstr *Kill = vi.Kills[i];
-      SlotIndex killIdx =
-        getInstructionIndex(Kill).getDefIndex();
-      LiveRange LR(getMBBStartIdx(Kill->getParent()), killIdx, ValNo);
+      SlotIndex Start = getMBBStartIdx(Kill->getParent());
+      SlotIndex killIdx = getInstructionIndex(Kill).getDefIndex();
+
+      // Create interval with one of a NEW value number.  Note that this value
+      // number isn't actually defined by an instruction, weird huh? :)
+      if (PHIJoin) {
+        ValNo = interval.getNextValue(SlotIndex(Start, true), 0, false,
+                                      VNInfoAllocator);
+        ValNo->setIsPHIDef(true);
+      }
+      LiveRange LR(Start, killIdx, ValNo);
       interval.addRange(LR);
       ValNo->addKill(killIdx);
       DEBUG(dbgs() << " +" << LR);
@@ -409,48 +428,11 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
           interval.print(dbgs(), tri_);
         });
     } else {
-      // Otherwise, this must be because of phi elimination.  If this is the
-      // first redefinition of the vreg that we have seen, go back and change
-      // the live range in the PHI block to be a different value number.
-      if (interval.containsOneValue()) {
-
-        VNInfo *VNI = interval.getValNumInfo(0);
-        // Phi elimination may have reused the register for multiple identical
-        // phi nodes. There will be a kill per phi. Remove the old ranges that
-        // we now know have an incorrect number.
-        for (unsigned ki=0, ke=vi.Kills.size(); ki != ke; ++ki) {
-          MachineInstr *Killer = vi.Kills[ki];
-          SlotIndex Start = getMBBStartIdx(Killer->getParent());
-          SlotIndex End = getInstructionIndex(Killer).getDefIndex();
-          DEBUG({
-              dbgs() << "\n\t\trenaming [" << Start << "," << End << "] in: ";
-              interval.print(dbgs(), tri_);
-            });
-          interval.removeRange(Start, End);
-
-          // Replace the interval with one of a NEW value number.  Note that
-          // this value number isn't actually defined by an instruction, weird
-          // huh? :)
-          LiveRange LR(Start, End,
-                       interval.getNextValue(SlotIndex(Start, true),
-                                             0, false, VNInfoAllocator));
-          LR.valno->setIsPHIDef(true);
-          interval.addRange(LR);
-          LR.valno->addKill(End);
-        }
-
-        MachineBasicBlock *killMBB = getMBBFromIndex(VNI->def);
-        VNI->addKill(indexes_->getTerminatorGap(killMBB));
-        VNI->setHasPHIKill(true);
-        DEBUG({
-            dbgs() << " RESULT: ";
-            interval.print(dbgs(), tri_);
-          });
-      }
-
+      assert(lv_->isPHIJoin(interval.reg) && "Multiply defined register");
       // In the case of PHI elimination, each variable definition is only
       // live until the end of the block.  We've already taken care of the
       // rest of the live range.
+
       SlotIndex defIndex = MIIdx.getDefIndex();
       if (MO.isEarlyClobber())
         defIndex = MIIdx.getUseIndex();
@@ -468,7 +450,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       interval.addRange(LR);
       ValNo->addKill(indexes_->getTerminatorGap(mbb));
       ValNo->setHasPHIKill(true);
-      DEBUG(dbgs() << " +" << LR);
+      DEBUG(dbgs() << " phi-join +" << LR);
     }
   }
 
@@ -613,6 +595,9 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
   while (mi != E) {
     if (mi->isDebugValue()) {
       ++mi;
+      if (mi != E && !mi->isDebugValue()) {
+        baseIndex = indexes_->getNextNonNullIndex(baseIndex);
+      }
       continue;
     }
     if (mi->killsRegister(interval.reg, tri_)) {
@@ -1355,11 +1340,9 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
     MachineBasicBlock *MBB = MI->getParent();
 
     if (ImpUse && MI != ReMatDefMI) {
-      // Re-matting an instruction with virtual register use. Update the
-      // register interval's spill weight to HUGE_VALF to prevent it from
-      // being spilled.
-      LiveInterval &ImpLi = getInterval(ImpUse);
-      ImpLi.weight = HUGE_VALF;
+      // Re-matting an instruction with virtual register use. Prevent interval
+      // from being spilled.
+      getInterval(ImpUse).markNotSpillable();
     }
 
     unsigned MBBId = MBB->getNumber();
@@ -1411,7 +1394,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
     LiveInterval &nI = getOrCreateInterval(NewVReg);
     if (!TrySplit) {
       // The spill weight is now infinity as it cannot be spilled again.
-      nI.weight = HUGE_VALF;
+      nI.markNotSpillable();
       continue;
     }
 
@@ -1559,6 +1542,28 @@ LiveIntervals::handleSpilledImpDefs(const LiveInterval &li, VirtRegMap &vrm,
   }
 }
 
+float
+LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) {
+  // Limit the loop depth ridiculousness.
+  if (loopDepth > 200)
+    loopDepth = 200;
+
+  // The loop depth is used to roughly estimate the number of times the
+  // instruction is executed. Something like 10^d is simple, but will quickly
+  // overflow a float. This expression behaves like 10^d for small d, but is
+  // more tempered for large d. At d=200 we get 6.7e33 which leaves a bit of
+  // headroom before overflow.
+  float lc = powf(1 + (100.0f / (loopDepth+10)), (float)loopDepth);
+
+  return (isDef + isUse) * lc;
+}
+
+void
+LiveIntervals::normalizeSpillWeights(std::vector<LiveInterval*> &NewLIs) {
+  for (unsigned i = 0, e = NewLIs.size(); i != e; ++i)
+    normalizeSpillWeight(*NewLIs[i]);
+}
+
 std::vector<LiveInterval*> LiveIntervals::
 addIntervalsForSpillsFast(const LiveInterval &li,
                           const MachineLoopInfo *loopInfo,
@@ -1567,8 +1572,7 @@ addIntervalsForSpillsFast(const LiveInterval &li,
 
   std::vector<LiveInterval*> added;
 
-  assert(li.weight != HUGE_VALF &&
-         "attempt to spill already spilled interval!");
+  assert(li.isSpillable() && "attempt to spill already spilled interval!");
 
   DEBUG({
       dbgs() << "\t\t\t\tadding intervals for spills for interval: ";
@@ -1604,10 +1608,7 @@ addIntervalsForSpillsFast(const LiveInterval &li,
       
       // create a new register for this spill
       LiveInterval &nI = getOrCreateInterval(NewVReg);
-
-      // the spill weight is now infinity as it
-      // cannot be spilled again
-      nI.weight = HUGE_VALF;
+      nI.markNotSpillable();
       
       // Rewrite register operands to use the new vreg.
       for (SmallVectorImpl<unsigned>::iterator I = Indices.begin(),
@@ -1661,8 +1662,7 @@ addIntervalsForSpills(const LiveInterval &li,
   if (EnableFastSpilling)
     return addIntervalsForSpillsFast(li, loopInfo, vrm);
   
-  assert(li.weight != HUGE_VALF &&
-         "attempt to spill already spilled interval!");
+  assert(li.isSpillable() && "attempt to spill already spilled interval!");
 
   DEBUG({
       dbgs() << "\t\t\t\tadding intervals for spills for interval: ";
@@ -1736,6 +1736,7 @@ addIntervalsForSpills(const LiveInterval &li,
     }
 
     handleSpilledImpDefs(li, vrm, rc, NewLIs);
+    normalizeSpillWeights(NewLIs);
     return NewLIs;
   }
 
@@ -1811,6 +1812,7 @@ addIntervalsForSpills(const LiveInterval &li,
   // Insert spills / restores if we are splitting.
   if (!TrySplit) {
     handleSpilledImpDefs(li, vrm, rc, NewLIs);
+    normalizeSpillWeights(NewLIs);
     return NewLIs;
   }
 
@@ -1927,11 +1929,10 @@ addIntervalsForSpills(const LiveInterval &li,
             unsigned ImpUse = getReMatImplicitUse(li, ReMatDefMI);
             if (ImpUse) {
               // Re-matting an instruction with virtual register use. Add the
-              // register as an implicit use on the use MI and update the register
-              // interval's spill weight to HUGE_VALF to prevent it from being
-              // spilled.
+              // register as an implicit use on the use MI and mark the register
+              // interval as unspillable.
               LiveInterval &ImpLi = getInterval(ImpUse);
-              ImpLi.weight = HUGE_VALF;
+              ImpLi.markNotSpillable();
               MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true));
             }
           }
@@ -1970,6 +1971,7 @@ addIntervalsForSpills(const LiveInterval &li,
   }
 
   handleSpilledImpDefs(li, vrm, rc, RetNewLIs);
+  normalizeSpillWeights(RetNewLIs);
   return RetNewLIs;
 }
 
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 8a124dc..519990e 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -365,27 +365,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
     }
   }
 
-  if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) {
-    if (LastPartDef)
-      // The last partial def kills the register.
-      LastPartDef->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/,
-                                                true/*IsImp*/, true/*IsKill*/));
-    else {
-      MachineOperand *MO =
-        LastRefOrPartRef->findRegisterDefOperand(Reg, false, TRI);
-      bool NeedEC = MO->isEarlyClobber() && MO->getReg() != Reg;
-      // If the last reference is the last def, then it's not used at all.
-      // That is, unless we are currently processing the last reference itself.
-      LastRefOrPartRef->addRegisterDead(Reg, TRI, true);
-      if (NeedEC) {
-        // If we are adding a subreg def and the superreg def is marked early
-        // clobber, add an early clobber marker to the subreg def.
-        MO = LastRefOrPartRef->findRegisterDefOperand(Reg);
-        if (MO)
-          MO->setIsEarlyClobber();
-      }
-    }
-  } else if (!PhysRegUse[Reg]) {
+  if (!PhysRegUse[Reg]) {
     // Partial uses. Mark register def dead and add implicit def of
     // sub-registers which are used.
     // EAX<dead>  = op  AL<imp-def>
@@ -419,6 +399,26 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
       for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
         PartUses.erase(*SS);
     }
+  } else if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) {
+    if (LastPartDef)
+      // The last partial def kills the register.
+      LastPartDef->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/,
+                                                true/*IsImp*/, true/*IsKill*/));
+    else {
+      MachineOperand *MO =
+        LastRefOrPartRef->findRegisterDefOperand(Reg, false, TRI);
+      bool NeedEC = MO->isEarlyClobber() && MO->getReg() != Reg;
+      // If the last reference is the last def, then it's not used at all.
+      // That is, unless we are currently processing the last reference itself.
+      LastRefOrPartRef->addRegisterDead(Reg, TRI, true);
+      if (NeedEC) {
+        // If we are adding a subreg def and the superreg def is marked early
+        // clobber, add an early clobber marker to the subreg def.
+        MO = LastRefOrPartRef->findRegisterDefOperand(Reg);
+        if (MO)
+          MO->setIsEarlyClobber();
+      }
+    }
   } else
     LastRefOrPartRef->addRegisterKilled(Reg, TRI, true);
   return true;
@@ -510,6 +510,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
   PHIVarInfo = new SmallVector<unsigned, 4>[MF->getNumBlockIDs()];
   std::fill(PhysRegDef,  PhysRegDef  + NumRegs, (MachineInstr*)0);
   std::fill(PhysRegUse,  PhysRegUse  + NumRegs, (MachineInstr*)0);
+  PHIJoins.clear();
 
   /// Get some space for a respectable number of registers.
   VirtRegInfo.resize(64);
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 655a0bf..64134ce 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -143,36 +143,6 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {
   return I;
 }
 
-/// isOnlyReachableViaFallthough - Return true if this basic block has
-/// exactly one predecessor and the control transfer mechanism between
-/// the predecessor and this block is a fall-through.
-bool MachineBasicBlock::isOnlyReachableByFallthrough() const {
-  // If this is a landing pad, it isn't a fall through.  If it has no preds,
-  // then nothing falls through to it.
-  if (isLandingPad() || pred_empty())
-    return false;
-  
-  // If there isn't exactly one predecessor, it can't be a fall through.
-  const_pred_iterator PI = pred_begin(), PI2 = PI;
-  ++PI2;
-  if (PI2 != pred_end())
-    return false;
-  
-  // The predecessor has to be immediately before this block.
-  const MachineBasicBlock *Pred = *PI;
-  
-  if (!Pred->isLayoutSuccessor(this))
-    return false;
-  
-  // If the block is completely empty, then it definitely does fall through.
-  if (Pred->empty())
-    return true;
-  
-  // Otherwise, check the last instruction.
-  const MachineInstr &LastInst = Pred->back();
-  return !LastInst.getDesc().isBarrier();
-}
-
 void MachineBasicBlock::dump() const {
   print(dbgs());
 }
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
new file mode 100644
index 0000000..b376e3d
--- /dev/null
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -0,0 +1,268 @@
+//===-- MachineCSE.cpp - Machine Common Subexpression Elimination Pass ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs global common subexpression elimination on machine
+// instructions using a scoped hash table based value numbering scheme. It
+// must be run while the machine function is still in SSA form.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "machine-cse"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/ScopedHashTable.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+STATISTIC(NumCoalesces, "Number of copies coalesced");
+STATISTIC(NumCSEs,      "Number of common subexpression eliminated");
+
+namespace {
+  class MachineCSE : public MachineFunctionPass {
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    MachineRegisterInfo  *MRI;
+    MachineDominatorTree *DT;
+    AliasAnalysis *AA;
+  public:
+    static char ID; // Pass identification
+    MachineCSE() : MachineFunctionPass(&ID), CurrVN(0) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+    
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      MachineFunctionPass::getAnalysisUsage(AU);
+      AU.addRequired<AliasAnalysis>();
+      AU.addRequired<MachineDominatorTree>();
+      AU.addPreserved<MachineDominatorTree>();
+    }
+
+  private:
+    unsigned CurrVN;
+    ScopedHashTable<MachineInstr*, unsigned, MachineInstrExpressionTrait> VNT;
+    SmallVector<MachineInstr*, 64> Exps;
+
+    bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB);
+    bool isPhysDefTriviallyDead(unsigned Reg,
+                                MachineBasicBlock::const_iterator I,
+                                MachineBasicBlock::const_iterator E);
+    bool hasLivePhysRegDefUse(MachineInstr *MI, MachineBasicBlock *MBB);
+    bool isCSECandidate(MachineInstr *MI);
+    bool ProcessBlock(MachineDomTreeNode *Node);
+  };
+} // end anonymous namespace
+
+char MachineCSE::ID = 0;
+static RegisterPass<MachineCSE>
+X("machine-cse", "Machine Common Subexpression Elimination");
+
+FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); }
+
+bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
+                                          MachineBasicBlock *MBB) {
+  bool Changed = false;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isUse())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg))
+      continue;
+    if (!MRI->hasOneUse(Reg))
+      // Only coalesce single use copies. This ensure the copy will be
+      // deleted.
+      continue;
+    MachineInstr *DefMI = MRI->getVRegDef(Reg);
+    if (DefMI->getParent() != MBB)
+      continue;
+    unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+    if (TII->isMoveInstr(*DefMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
+        TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+        !SrcSubIdx && !DstSubIdx) {
+      MO.setReg(SrcReg);
+      DefMI->eraseFromParent();
+      ++NumCoalesces;
+      Changed = true;
+    }
+  }
+
+  return Changed;
+}
+
+bool MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
+                                        MachineBasicBlock::const_iterator I,
+                                        MachineBasicBlock::const_iterator E) {
+  unsigned LookAheadLeft = 5;
+  while (LookAheadLeft--) {
+    if (I == E)
+      // Reached end of block, register is obviously dead.
+      return true;
+
+    if (I->isDebugValue())
+      continue;
+    bool SeenDef = false;
+    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = I->getOperand(i);
+      if (!MO.isReg() || !MO.getReg())
+        continue;
+      if (!TRI->regsOverlap(MO.getReg(), Reg))
+        continue;
+      if (MO.isUse())
+        return false;
+      SeenDef = true;
+    }
+    if (SeenDef)
+      // See a def of Reg (or an alias) before encountering any use, it's 
+      // trivially dead.
+      return true;
+    ++I;
+  }
+  return false;
+}
+
+bool MachineCSE::hasLivePhysRegDefUse(MachineInstr *MI, MachineBasicBlock *MBB){
+  unsigned PhysDef = 0;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg)
+      continue;
+    if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+      if (MO.isUse())
+        // Can't touch anything to read a physical register.
+        return true;
+      if (MO.isDead())
+        // If the def is dead, it's ok.
+        continue;
+      // Ok, this is a physical register def that's not marked "dead". That's
+      // common since this pass is run before livevariables. We can scan
+      // forward a few instructions and check if it is obviously dead.
+      if (PhysDef)
+        // Multiple physical register defs. These are rare, forget about it.
+        return true;
+      PhysDef = Reg;
+    }
+  }
+
+  if (PhysDef) {
+    MachineBasicBlock::iterator I = MI; I = llvm::next(I);
+    if (!isPhysDefTriviallyDead(PhysDef, I, MBB->end()))
+      return true;
+  }
+  return false;
+}
+
+bool MachineCSE::isCSECandidate(MachineInstr *MI) {
+  // Ignore copies or instructions that read / write physical registers
+  // (except for dead defs of physical registers).
+  unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+  if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) ||
+      MI->isExtractSubreg() || MI->isInsertSubreg() || MI->isSubregToReg())
+    return false;
+
+  // Ignore stuff that we obviously can't move.
+  const TargetInstrDesc &TID = MI->getDesc();  
+  if (TID.mayStore() || TID.isCall() || TID.isTerminator() ||
+      TID.hasUnmodeledSideEffects())
+    return false;
+
+  if (TID.mayLoad()) {
+    // Okay, this instruction does a load. As a refinement, we allow the target
+    // to decide whether the loaded value is actually a constant. If so, we can
+    // actually use it as a load.
+    if (!MI->isInvariantLoad(AA))
+      // FIXME: we should be able to hoist loads with no other side effects if
+      // there are no other instructions which can change memory in this loop.
+      // This is a trivial form of alias analysis.
+      return false;
+  }
+  return true;
+}
+
+bool MachineCSE::ProcessBlock(MachineDomTreeNode *Node) {
+  bool Changed = false;
+
+  ScopedHashTableScope<MachineInstr*, unsigned,
+    MachineInstrExpressionTrait> VNTS(VNT);
+  MachineBasicBlock *MBB = Node->getBlock();
+  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
+    MachineInstr *MI = &*I;
+    ++I;
+
+    if (!isCSECandidate(MI))
+      continue;
+
+    bool FoundCSE = VNT.count(MI);
+    if (!FoundCSE) {
+      // Look for trivial copy coalescing opportunities.
+      if (PerformTrivialCoalescing(MI, MBB))
+        FoundCSE = VNT.count(MI);
+    }
+    // FIXME: commute commutable instructions?
+
+    // If the instruction defines a physical register and the value *may* be
+    // used, then it's not safe to replace it with a common subexpression.
+    if (FoundCSE && hasLivePhysRegDefUse(MI, MBB))
+      FoundCSE = false;
+
+    if (!FoundCSE) {
+      VNT.insert(MI, CurrVN++);
+      Exps.push_back(MI);
+      continue;
+    }
+
+    // Found a common subexpression, eliminate it.
+    unsigned CSVN = VNT.lookup(MI);
+    MachineInstr *CSMI = Exps[CSVN];
+    DEBUG(dbgs() << "Examining: " << *MI);
+    DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
+    unsigned NumDefs = MI->getDesc().getNumDefs();
+    for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isDef())
+        continue;
+      unsigned OldReg = MO.getReg();
+      unsigned NewReg = CSMI->getOperand(i).getReg();
+      if (OldReg == NewReg)
+        continue;
+      assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
+             TargetRegisterInfo::isVirtualRegister(NewReg) &&
+             "Do not CSE physical register defs!");
+      MRI->replaceRegWith(OldReg, NewReg);
+      --NumDefs;
+    }
+    MI->eraseFromParent();
+    ++NumCSEs;
+  }
+
+  // Recursively call ProcessBlock with childred.
+  const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
+  for (unsigned i = 0, e = Children.size(); i != e; ++i)
+    Changed |= ProcessBlock(Children[i]);
+
+  return Changed;
+}
+
+bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
+  TII = MF.getTarget().getInstrInfo();
+  TRI = MF.getTarget().getRegisterInfo();
+  MRI = &MF.getRegInfo();
+  DT = &getAnalysis<MachineDominatorTree>();
+  AA = &getAnalysis<AliasAnalysis>();
+  return ProcessBlock(DT->getRootNode());
+}
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index f141c56..4377d5b 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -95,6 +95,9 @@ MachineFunction::MachineFunction(Function *F, const TargetMachine &TM,
   MFInfo = 0;
   FrameInfo = new (Allocator.Allocate<MachineFrameInfo>())
                   MachineFrameInfo(*TM.getFrameInfo());
+  if (Fn->hasFnAttr(Attribute::StackAlignment))
+    FrameInfo->setMaxAlignment(Attribute::getStackAlignmentFromAttrs(
+        Fn->getAttributes().getFnAttributes()));
   ConstantPool = new (Allocator.Allocate<MachineConstantPool>())
                      MachineConstantPool(TM.getTargetData());
   Alignment = TM.getTargetLowering()->getFunctionAlignment(F);
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index df61c74..e23670d 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -18,6 +18,7 @@
 #include "llvm/Type.h"
 #include "llvm/Value.h"
 #include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -305,7 +306,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
 MachineMemOperand::MachineMemOperand(const Value *v, unsigned int f,
                                      int64_t o, uint64_t s, unsigned int a)
   : Offset(o), Size(s), V(v),
-    Flags((f & 7) | ((Log2_32(a) + 1) << 3)) {
+    Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)) {
   assert(getBaseAlignment() == a && "Alignment is not a power of 2!");
   assert((isLoad() || isStore()) && "Not a load/store!");
 }
@@ -327,7 +328,8 @@ void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
 
   if (MMO->getBaseAlignment() >= getBaseAlignment()) {
     // Update the alignment value.
-    Flags = (Flags & 7) | ((Log2_32(MMO->getBaseAlignment()) + 1) << 3);
+    Flags = (Flags & ((1 << MOMaxBits) - 1)) |
+      ((Log2_32(MMO->getBaseAlignment()) + 1) << MOMaxBits);
     // Also update the base and offset, because the new alignment may
     // not be applicable with the old ones.
     V = MMO->getValue();
@@ -700,6 +702,35 @@ void MachineInstr::addMemOperand(MachineFunction &MF,
   MemRefsEnd = NewMemRefsEnd;
 }
 
+bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
+                                 MICheckType Check) const {
+  // If opcodes or number of operands are not the same then the two
+  // instructions are obviously not identical.
+  if (Other->getOpcode() != getOpcode() ||
+      Other->getNumOperands() != getNumOperands())
+    return false;
+
+  // Check operands to make sure they match.
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = getOperand(i);
+    const MachineOperand &OMO = Other->getOperand(i);
+    // Clients may or may not want to ignore defs when testing for equality.
+    // For example, machine CSE pass only cares about finding common
+    // subexpressions, so it's safe to ignore virtual register defs.
+    if (Check != CheckDefs && MO.isReg() && MO.isDef()) {
+      if (Check == IgnoreDefs)
+        continue;
+      // Check == IgnoreVRegDefs
+      if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
+          TargetRegisterInfo::isPhysicalRegister(OMO.getReg()))
+        if (MO.getReg() != OMO.getReg())
+          return false;
+    } else if (!MO.isIdenticalTo(OMO))
+      return false;
+  }
+  return true;
+}
+
 /// removeFromParent - This method unlinks 'this' from the containing basic
 /// block, and returns it, but does not delete it.
 MachineInstr *MachineInstr::removeFromParent() {
@@ -958,8 +989,8 @@ void MachineInstr::copyPredicates(const MachineInstr *MI) {
 /// SawStore is set to true, it means that there is a store (or call) between
 /// the instruction's location and its intended destination.
 bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
-                                bool &SawStore,
-                                AliasAnalysis *AA) const {
+                                AliasAnalysis *AA,
+                                bool &SawStore) const {
   // Ignore stuff that we obviously can't move.
   if (TID->mayStore() || TID->isCall()) {
     SawStore = true;
@@ -984,11 +1015,11 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
 /// isSafeToReMat - Return true if it's safe to rematerialize the specified
 /// instruction which defined the specified register instead of copying it.
 bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII,
-                                 unsigned DstReg,
-                                 AliasAnalysis *AA) const {
+                                 AliasAnalysis *AA,
+                                 unsigned DstReg) const {
   bool SawStore = false;
   if (!TII->isTriviallyReMaterializable(this, AA) ||
-      !isSafeToMove(TII, SawStore, AA))
+      !isSafeToMove(TII, AA, SawStore))
     return false;
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = getOperand(i);
@@ -1324,3 +1355,48 @@ void MachineInstr::addRegisterDefined(unsigned IncomingReg,
                                          true  /*IsDef*/,
                                          true  /*IsImp*/));
 }
+
+unsigned
+MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
+  unsigned Hash = MI->getOpcode() * 37;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    uint64_t Key = (uint64_t)MO.getType() << 32;
+    switch (MO.getType()) {
+      default: break;
+      case MachineOperand::MO_Register:
+        if (MO.isDef() && MO.getReg() &&
+            TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+          continue;  // Skip virtual register defs.
+        Key |= MO.getReg();
+        break;
+      case MachineOperand::MO_Immediate:
+        Key |= MO.getImm();
+        break;
+      case MachineOperand::MO_FrameIndex:
+      case MachineOperand::MO_ConstantPoolIndex:
+      case MachineOperand::MO_JumpTableIndex:
+        Key |= MO.getIndex();
+        break;
+      case MachineOperand::MO_MachineBasicBlock:
+        Key |= DenseMapInfo<void*>::getHashValue(MO.getMBB());
+        break;
+      case MachineOperand::MO_GlobalAddress:
+        Key |= DenseMapInfo<void*>::getHashValue(MO.getGlobal());
+        break;
+      case MachineOperand::MO_BlockAddress:
+        Key |= DenseMapInfo<void*>::getHashValue(MO.getBlockAddress());
+        break;
+    }
+    Key += ~(Key << 32);
+    Key ^= (Key >> 22);
+    Key += ~(Key << 13);
+    Key ^= (Key >> 8);
+    Key += (Key << 3);
+    Key ^= (Key >> 15);
+    Key += ~(Key << 27);
+    Key ^= (Key >> 31);
+    Hash = (unsigned)Key + Hash * 37;
+  }
+  return Hash;
+}
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 92c84f3..0361694 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -252,32 +252,6 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
       return false;
   }
 
-  DEBUG({
-      dbgs() << "--- Checking if we can hoist " << I;
-      if (I.getDesc().getImplicitUses()) {
-        dbgs() << "  * Instruction has implicit uses:\n";
-
-        const TargetRegisterInfo *TRI = TM->getRegisterInfo();
-        for (const unsigned *ImpUses = I.getDesc().getImplicitUses();
-             *ImpUses; ++ImpUses)
-          dbgs() << "      -> " << TRI->getName(*ImpUses) << "\n";
-      }
-
-      if (I.getDesc().getImplicitDefs()) {
-        dbgs() << "  * Instruction has implicit defines:\n";
-
-        const TargetRegisterInfo *TRI = TM->getRegisterInfo();
-        for (const unsigned *ImpDefs = I.getDesc().getImplicitDefs();
-             *ImpDefs; ++ImpDefs)
-          dbgs() << "      -> " << TRI->getName(*ImpDefs) << "\n";
-      }
-    });
-
-  if (I.getDesc().getImplicitDefs() || I.getDesc().getImplicitUses()) {
-    DEBUG(dbgs() << "Cannot hoist with implicit defines or uses\n");
-    return false;
-  }
-
   // The instruction is loop invariant if all of its operands are.
   for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = I.getOperand(i);
@@ -311,6 +285,10 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
       } else if (!MO.isDead()) {
         // A def that isn't dead. We can't move it.
         return false;
+      } else if (CurLoop->getHeader()->isLiveIn(Reg)) {
+        // If the reg is live into the loop, we can't hoist an instruction
+        // which would clobber it.
+        return false;
       }
     }
 
@@ -467,7 +445,7 @@ MachineLICM::LookForDuplicate(const MachineInstr *MI,
                               std::vector<const MachineInstr*> &PrevMIs) {
   for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) {
     const MachineInstr *PrevMI = PrevMIs[i];
-    if (TII->isIdentical(MI, PrevMI, RegInfo))
+    if (TII->produceSameValue(MI, PrevMI))
       return PrevMI;
   }
   return 0;
@@ -480,9 +458,20 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,
 
   if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
     DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);
+
+    // Replace virtual registers defined by MI by their counterparts defined
+    // by Dup.
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
       const MachineOperand &MO = MI->getOperand(i);
-      if (MO.isReg() && MO.isDef())
+
+      // Physical registers may not differ here.
+      assert((!MO.isReg() || MO.getReg() == 0 ||
+              !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
+              MO.getReg() == Dup->getOperand(i).getReg()) &&
+             "Instructions with different phys regs are not identical!");
+
+      if (MO.isReg() && MO.isDef() &&
+          !TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
         RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
     }
     MI->eraseFromParent();
diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp
index 8378906..39d2c75 100644
--- a/lib/CodeGen/MachineModuleInfoImpls.cpp
+++ b/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -22,7 +22,7 @@ using namespace llvm;
 
 // Out of line virtual method.
 void MachineModuleInfoMachO::Anchor() {}
-
+void MachineModuleInfoELF::Anchor() {}
 
 static int SortSymbolPair(const void *LHS, const void *RHS) {
   const MCSymbol *LHSS =
@@ -34,10 +34,11 @@ static int SortSymbolPair(const void *LHS, const void *RHS) {
 
 /// GetSortedStubs - Return the entries from a DenseMap in a deterministic
 /// sorted orer.
-MachineModuleInfoMachO::SymbolListTy
-MachineModuleInfoMachO::GetSortedStubs(const DenseMap<MCSymbol*, 
-                                                      MCSymbol*> &Map) {
-  MachineModuleInfoMachO::SymbolListTy List(Map.begin(), Map.end());
+MachineModuleInfoImpl::SymbolListTy
+MachineModuleInfoImpl::GetSortedStubs(const DenseMap<MCSymbol*,
+                                                     MCSymbol*> &Map) {
+  MachineModuleInfoImpl::SymbolListTy List(Map.begin(), Map.end());
+
   if (!List.empty())
     qsort(&List[0], List.size(), sizeof(List[0]), SortSymbolPair);
   return List;
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index b31973e..d9ab677 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -116,6 +116,19 @@ MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const {
   return 0;
 }
 
+bool MachineRegisterInfo::hasOneUse(unsigned RegNo) const {
+  use_iterator UI = use_begin(RegNo);
+  if (UI == use_end())
+    return false;
+  return ++UI == use_end();
+}
+
+bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const {
+  use_nodbg_iterator UI = use_nodbg_begin(RegNo);
+  if (UI == use_nodbg_end())
+    return false;
+  return ++UI == use_nodbg_end();
+}
 
 #ifndef NDEBUG
 void MachineRegisterInfo::dumpUses(unsigned Reg) const {
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index c391576..e47ba7c 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -72,8 +72,13 @@ bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
                                              MachineBasicBlock *MBB) const {
   assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
          "Only makes sense for vregs");
-  for (MachineRegisterInfo::use_iterator I = RegInfo->use_begin(Reg),
-       E = RegInfo->use_end(); I != E; ++I) {
+  // Ignoring debug uses is necessary so debug info doesn't affect the code.
+  // This may leave a referencing dbg_value in the original block, before
+  // the definition of the vreg.  Dwarf generator handles this although the
+  // user might not get the right info at runtime.
+  for (MachineRegisterInfo::use_nodbg_iterator I = 
+       RegInfo->use_nodbg_begin(Reg),
+       E = RegInfo->use_nodbg_end(); I != E; ++I) {
     // Determine the block of the use.
     MachineInstr *UseInst = &*I;
     MachineBasicBlock *UseBlock = UseInst->getParent();
@@ -135,7 +140,10 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
     ProcessedBegin = I == MBB.begin();
     if (!ProcessedBegin)
       --I;
-    
+
+    if (MI->isDebugValue())
+      continue;
+
     if (SinkInstruction(MI, SawStore))
       ++NumSunk, MadeChange = true;
     
@@ -149,7 +157,7 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
 /// instruction out of its current block into a successor.
 bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
   // Check if it's safe to move the instruction.
-  if (!MI->isSafeToMove(TII, SawStore, AA))
+  if (!MI->isSafeToMove(TII, AA, SawStore))
     return false;
   
   // FIXME: This should include support for sinking instructions within the
diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp
new file mode 100644
index 0000000..2717d4d
--- /dev/null
+++ b/lib/CodeGen/OptimizePHIs.cpp
@@ -0,0 +1,189 @@
+//===-- OptimizePHIs.cpp - Optimize machine instruction PHIs --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass optimizes machine instruction PHIs to take advantage of
+// opportunities created during DAG legalization.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "phi-opt"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumPHICycles, "Number of PHI cycles replaced");
+STATISTIC(NumDeadPHICycles, "Number of dead PHI cycles");
+
+namespace {
+  class OptimizePHIs : public MachineFunctionPass {
+    MachineRegisterInfo *MRI;
+    const TargetInstrInfo *TII;
+
+  public:
+    static char ID; // Pass identification
+    OptimizePHIs() : MachineFunctionPass(&ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+  private:
+    typedef SmallPtrSet<MachineInstr*, 16> InstrSet;
+    typedef SmallPtrSetIterator<MachineInstr*> InstrSetIterator;
+
+    bool IsSingleValuePHICycle(MachineInstr *MI, unsigned &SingleValReg,
+                               InstrSet &PHIsInCycle);
+    bool IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle);
+    bool OptimizeBB(MachineBasicBlock &MBB);
+  };
+}
+
+char OptimizePHIs::ID = 0;
+static RegisterPass<OptimizePHIs>
+X("opt-phis", "Optimize machine instruction PHIs");
+
+FunctionPass *llvm::createOptimizePHIsPass() { return new OptimizePHIs(); }
+
+bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) {
+  MRI = &Fn.getRegInfo();
+  TII = Fn.getTarget().getInstrInfo();
+
+  // Find dead PHI cycles and PHI cycles that can be replaced by a single
+  // value.  InstCombine does these optimizations, but DAG legalization may
+  // introduce new opportunities, e.g., when i64 values are split up for
+  // 32-bit targets.
+  bool Changed = false;
+  for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
+    Changed |= OptimizeBB(*I);
+
+  return Changed;
+}
+
+/// IsSingleValuePHICycle - Check if MI is a PHI where all the source operands
+/// are copies of SingleValReg, possibly via copies through other PHIs.  If
+/// SingleValReg is zero on entry, it is set to the register with the single
+/// non-copy value.  PHIsInCycle is a set used to keep track of the PHIs that
+/// have been scanned.
+bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI,
+                                         unsigned &SingleValReg,
+                                         InstrSet &PHIsInCycle) {
+  assert(MI->isPHI() && "IsSingleValuePHICycle expects a PHI instruction");
+  unsigned DstReg = MI->getOperand(0).getReg();
+
+  // See if we already saw this register.
+  if (!PHIsInCycle.insert(MI))
+    return true;
+
+  // Don't scan crazily complex things.
+  if (PHIsInCycle.size() == 16)
+    return false;
+
+  // Scan the PHI operands.
+  for (unsigned i = 1; i != MI->getNumOperands(); i += 2) {
+    unsigned SrcReg = MI->getOperand(i).getReg();
+    if (SrcReg == DstReg)
+      continue;
+    MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
+
+    // Skip over register-to-register moves.
+    unsigned MvSrcReg, MvDstReg, SrcSubIdx, DstSubIdx;
+    if (SrcMI &&
+        TII->isMoveInstr(*SrcMI, MvSrcReg, MvDstReg, SrcSubIdx, DstSubIdx) &&
+        SrcSubIdx == 0 && DstSubIdx == 0 &&
+        TargetRegisterInfo::isVirtualRegister(MvSrcReg))
+      SrcMI = MRI->getVRegDef(MvSrcReg);
+    if (!SrcMI)
+      return false;
+
+    if (SrcMI->isPHI()) {
+      if (!IsSingleValuePHICycle(SrcMI, SingleValReg, PHIsInCycle))
+        return false;
+    } else {
+      // Fail if there is more than one non-phi/non-move register.
+      if (SingleValReg != 0)
+        return false;
+      SingleValReg = SrcReg;
+    }
+  }
+  return true;
+}
+
+/// IsDeadPHICycle - Check if the register defined by a PHI is only used by
+/// other PHIs in a cycle.
+bool OptimizePHIs::IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle) {
+  assert(MI->isPHI() && "IsDeadPHICycle expects a PHI instruction");
+  unsigned DstReg = MI->getOperand(0).getReg();
+  assert(TargetRegisterInfo::isVirtualRegister(DstReg) &&
+         "PHI destination is not a virtual register");
+
+  // See if we already saw this register.
+  if (!PHIsInCycle.insert(MI))
+    return true;
+
+  // Don't scan crazily complex things.
+  if (PHIsInCycle.size() == 16)
+    return false;
+
+  for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DstReg),
+         E = MRI->use_end(); I != E; ++I) {
+    MachineInstr *UseMI = &*I;
+    if (!UseMI->isPHI() || !IsDeadPHICycle(UseMI, PHIsInCycle))
+      return false;
+  }
+
+  return true;
+}
+
+/// OptimizeBB - Remove dead PHI cycles and PHI cycles that can be replaced by
+/// a single value.
+bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) {
+  bool Changed = false;
+  for (MachineBasicBlock::iterator
+         MII = MBB.begin(), E = MBB.end(); MII != E; ) {
+    MachineInstr *MI = &*MII++;
+    if (!MI->isPHI())
+      break;
+
+    // Check for single-value PHI cycles.
+    unsigned SingleValReg = 0;
+    InstrSet PHIsInCycle;
+    if (IsSingleValuePHICycle(MI, SingleValReg, PHIsInCycle) &&
+        SingleValReg != 0) {
+      MRI->replaceRegWith(MI->getOperand(0).getReg(), SingleValReg);
+      MI->eraseFromParent();
+      ++NumPHICycles;
+      Changed = true;
+      continue;
+    }
+
+    // Check for dead PHI cycles.
+    PHIsInCycle.clear();
+    if (IsDeadPHICycle(MI, PHIsInCycle)) {
+      for (InstrSetIterator PI = PHIsInCycle.begin(), PE = PHIsInCycle.end();
+           PI != PE; ++PI) {
+        MachineInstr *PhiMI = *PI;
+        if (&*MII == PhiMI)
+          ++MII;
+        PhiMI->eraseFromParent();
+      }
+      ++NumDeadPHICycles;
+      Changed = true;
+    }
+  }
+  return Changed;
+}
diff --git a/lib/CodeGen/PBQP/HeuristicSolver.h b/lib/CodeGen/PBQP/HeuristicSolver.h
index b48f548..bd18b52 100644
--- a/lib/CodeGen/PBQP/HeuristicSolver.h
+++ b/lib/CodeGen/PBQP/HeuristicSolver.h
@@ -18,7 +18,6 @@
 
 #include "Graph.h"
 #include "Solution.h"
-#include "llvm/Support/raw_ostream.h"
 #include <vector>
 #include <limits>
 
@@ -230,7 +229,7 @@ namespace PBQP {
     }
 
     /// \brief Apply rule R1.
-    /// @param nItr Node iterator for node to apply R1 to.
+    /// @param xnItr Node iterator for node to apply R1 to.
     ///
     /// Node will be automatically pushed to the solver stack.
     void applyR1(Graph::NodeItr xnItr) {
@@ -278,7 +277,7 @@ namespace PBQP {
     }
 
     /// \brief Apply rule R2.
-    /// @param nItr Node iterator for node to apply R2 to.
+    /// @param xnItr Node iterator for node to apply R2 to.
     ///
     /// Node will be automatically pushed to the solver stack.
     void applyR2(Graph::NodeItr xnItr) {
@@ -494,14 +493,23 @@ namespace PBQP {
 
     bool tryNormaliseEdgeMatrix(Graph::EdgeItr &eItr) {
 
+      const PBQPNum infinity = std::numeric_limits<PBQPNum>::infinity();
+
       Matrix &edgeCosts = g.getEdgeCosts(eItr);
       Vector &uCosts = g.getNodeCosts(g.getEdgeNode1(eItr)),
              &vCosts = g.getNodeCosts(g.getEdgeNode2(eItr));
 
       for (unsigned r = 0; r < edgeCosts.getRows(); ++r) {
-        PBQPNum rowMin = edgeCosts.getRowMin(r);
+        PBQPNum rowMin = infinity;
+
+        for (unsigned c = 0; c < edgeCosts.getCols(); ++c) {
+          if (vCosts[c] != infinity && edgeCosts[r][c] < rowMin)
+            rowMin = edgeCosts[r][c];
+        }
+
         uCosts[r] += rowMin;
-        if (rowMin != std::numeric_limits<PBQPNum>::infinity()) {
+
+        if (rowMin != infinity) {
           edgeCosts.subFromRow(r, rowMin);
         }
         else {
@@ -510,9 +518,16 @@ namespace PBQP {
       }
 
       for (unsigned c = 0; c < edgeCosts.getCols(); ++c) {
-        PBQPNum colMin = edgeCosts.getColMin(c);
+        PBQPNum colMin = infinity;
+
+        for (unsigned r = 0; r < edgeCosts.getRows(); ++r) {
+          if (uCosts[r] != infinity && edgeCosts[r][c] < colMin)
+            colMin = edgeCosts[r][c];
+        }
+
         vCosts[c] += colMin;
-        if (colMin != std::numeric_limits<PBQPNum>::infinity()) {
+
+        if (colMin != infinity) {
           edgeCosts.subFromCol(c, colMin);
         }
         else {
diff --git a/lib/CodeGen/PBQP/Heuristics/Briggs.h b/lib/CodeGen/PBQP/Heuristics/Briggs.h
index c09ad74..30d34d9 100644
--- a/lib/CodeGen/PBQP/Heuristics/Briggs.h
+++ b/lib/CodeGen/PBQP/Heuristics/Briggs.h
@@ -128,14 +128,7 @@ namespace PBQP {
       /// selected for heuristic reduction instead.
       bool shouldOptimallyReduce(Graph::NodeItr nItr) {
         if (getSolver().getSolverDegree(nItr) < 3) {
-          if (getGraph().getNodeCosts(nItr)[0] !=
-                std::numeric_limits<PBQPNum>::infinity()) {
-            return true;
-          }
-          // Otherwise we have an infinite spill cost node.
-          initializeNode(nItr);
-          NodeData &nd = getHeuristicNodeData(nItr);
-          return nd.isAllocable;
+          return true;
         }
         // else
         return false;
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index b740c68..8bbe0a7 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -55,8 +55,6 @@ void llvm::PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
 bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &Fn) {
   MRI = &Fn.getRegInfo();
 
-  PHIDefs.clear();
-  PHIKills.clear();
   bool Changed = false;
 
   // Split critical edges to help the coalescer
@@ -215,10 +213,6 @@ void llvm::PHIElimination::LowerAtomicPHINode(
     TII->copyRegToReg(MBB, AfterPHIsIt, DestReg, IncomingReg, RC, RC);
   }
 
-  // Record PHI def.
-  assert(!hasPHIDef(DestReg) && "Vreg has multiple phi-defs?");
-  PHIDefs[DestReg] = &MBB;
-
   // Update live variable information if there is any.
   LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>();
   if (LV) {
@@ -229,6 +223,7 @@ void llvm::PHIElimination::LowerAtomicPHINode(
 
       // Increment use count of the newly created virtual register.
       VI.NumUses++;
+      LV->setPHIJoin(IncomingReg);
 
       // When we are reusing the incoming register, it may already have been
       // killed in this block. The old kill will also have been inserted at
@@ -276,9 +271,6 @@ void llvm::PHIElimination::LowerAtomicPHINode(
     // path the PHI.
     MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB();
 
-    // Record the kill.
-    PHIKills[SrcReg].insert(&opBlock);
-
     // If source is defined by an implicit def, there is no need to insert a
     // copy.
     MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
@@ -451,34 +443,3 @@ MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A,
 
   return NMBB;
 }
-
-unsigned
-PHIElimination::PHINodeTraits::getHashValue(const MachineInstr *MI) {
-  if (!MI || MI==getEmptyKey() || MI==getTombstoneKey())
-    return DenseMapInfo<MachineInstr*>::getHashValue(MI);
-  unsigned hash = 0;
-  for (unsigned ni = 1, ne = MI->getNumOperands(); ni != ne; ni += 2)
-    hash = hash*37 + DenseMapInfo<BBVRegPair>::
-      getHashValue(BBVRegPair(MI->getOperand(ni+1).getMBB()->getNumber(),
-                              MI->getOperand(ni).getReg()));
-  return hash;
-}
-
-bool PHIElimination::PHINodeTraits::isEqual(const MachineInstr *LHS,
-                                            const MachineInstr *RHS) {
-  const MachineInstr *EmptyKey = getEmptyKey();
-  const MachineInstr *TombstoneKey = getTombstoneKey();
-  if (!LHS || !RHS || LHS==EmptyKey || RHS==EmptyKey ||
-      LHS==TombstoneKey || RHS==TombstoneKey)
-    return LHS==RHS;
-
-  unsigned ne = LHS->getNumOperands();
-  if (ne != RHS->getNumOperands())
-      return false;
-  // Ignore operand 0, the defined register.
-  for (unsigned ni = 1; ni != ne; ni += 2)
-    if (LHS->getOperand(ni).getReg() != RHS->getOperand(ni).getReg() ||
-        LHS->getOperand(ni+1).getMBB() != RHS->getOperand(ni+1).getMBB())
-      return false;
-  return true;
-}
diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h
index 895aaa4..7dedf03 100644
--- a/lib/CodeGen/PHIElimination.h
+++ b/lib/CodeGen/PHIElimination.h
@@ -22,17 +22,8 @@ namespace llvm {
   /// Lower PHI instructions to copies.  
   class PHIElimination : public MachineFunctionPass {
     MachineRegisterInfo  *MRI; // Machine register information
-  private:
-
-    typedef SmallSet<MachineBasicBlock*, 4> PHIKillList;
-    typedef DenseMap<unsigned, PHIKillList> PHIKillMap;
-    typedef DenseMap<unsigned, MachineBasicBlock*> PHIDefMap;
 
   public:
-
-    typedef PHIKillList::iterator phi_kill_iterator;
-    typedef PHIKillList::const_iterator const_phi_kill_iterator;
-
     static char ID; // Pass identification, replacement for typeid
     PHIElimination() : MachineFunctionPass(&ID) {}
 
@@ -40,38 +31,6 @@ namespace llvm {
     
     virtual void getAnalysisUsage(AnalysisUsage &AU) const;
 
-    /// Return true if the given vreg was defined by a PHI intsr prior to
-    /// lowering.
-    bool hasPHIDef(unsigned vreg) const {
-      return PHIDefs.count(vreg);
-    }
-
-    /// Returns the block in which the PHI instruction which defined the
-    /// given vreg used to reside. 
-    MachineBasicBlock* getPHIDefBlock(unsigned vreg) {
-      PHIDefMap::iterator phiDefItr = PHIDefs.find(vreg);
-      assert(phiDefItr != PHIDefs.end() && "vreg has no phi-def.");
-      return phiDefItr->second;
-    }
-
-    /// Returns true if the given vreg was killed by a PHI instr.
-    bool hasPHIKills(unsigned vreg) const {
-      return PHIKills.count(vreg);
-    }
-
-    /// Returns an iterator over the BasicBlocks which contained PHI
-    /// kills of this register prior to lowering.
-    phi_kill_iterator phiKillsBegin(unsigned vreg) {
-      PHIKillMap::iterator phiKillItr = PHIKills.find(vreg);
-      assert(phiKillItr != PHIKills.end() && "vreg has no phi-kills.");
-      return phiKillItr->second.begin();
-    } 
-    phi_kill_iterator phiKillsEnd(unsigned vreg) {
-      PHIKillMap::iterator phiKillItr = PHIKills.find(vreg);
-      assert(phiKillItr != PHIKills.end() && "vreg has no phi-kills.");
-      return phiKillItr->second.end();
-    }
-
   private:
     /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions
     /// in predecessor basic blocks.
@@ -109,12 +68,29 @@ namespace llvm {
     // SkipPHIsAndLabels - Copies need to be inserted after phi nodes and
     // also after any exception handling labels: in landing pads execution
     // starts at the label, so any copies placed before it won't be executed!
+    // We also deal with DBG_VALUEs, which are a bit tricky:
+    //  PHI
+    //  DBG_VALUE
+    //  LABEL
+    // Here the DBG_VALUE needs to be skipped, and if it refers to a PHI it
+    // needs to be annulled or, better, moved to follow the label, as well.
+    //  PHI
+    //  DBG_VALUE
+    //  no label
+    // Here it is not a good idea to skip the DBG_VALUE.
+    // FIXME: For now we skip and annul all DBG_VALUEs, maximally simple and
+    // maximally stupid.
     MachineBasicBlock::iterator SkipPHIsAndLabels(MachineBasicBlock &MBB,
                                                 MachineBasicBlock::iterator I) {
       // Rather than assuming that EH labels come before other kinds of labels,
       // just skip all labels.
-      while (I != MBB.end() && (I->isPHI() || I->isLabel()))
+      while (I != MBB.end() && 
+             (I->isPHI() || I->isLabel() || I->isDebugValue())) {
+        if (I->isDebugValue() && I->getNumOperands()==3 && 
+            I->getOperand(0).isReg())
+          I->getOperand(0).setReg(0U);
         ++I;
+      }
       return I;
     }
 
@@ -122,21 +98,13 @@ namespace llvm {
     typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse;
 
     VRegPHIUse VRegPHIUseCount;
-    PHIDefMap PHIDefs;
-    PHIKillMap PHIKills;
 
     // Defs of PHI sources which are implicit_def.
     SmallPtrSet<MachineInstr*, 4> ImpDefs;
 
-    // Lowered PHI nodes may be reused. We provide special DenseMap traits to
-    // match PHI nodes with identical arguments.
-    struct PHINodeTraits : public DenseMapInfo<MachineInstr*> {
-      static unsigned getHashValue(const MachineInstr *PtrVal);
-      static bool isEqual(const MachineInstr *LHS, const MachineInstr *RHS);
-    };
-
     // Map reusable lowered PHI node -> incoming join register.
-    typedef DenseMap<MachineInstr*, unsigned, PHINodeTraits> LoweredPHIMap;
+    typedef DenseMap<MachineInstr*, unsigned,
+                     MachineInstrExpressionTrait> LoweredPHIMap;
     LoweredPHIMap LoweredPHIs;
   };
 
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index f67eb79..5ea2941 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -34,7 +34,7 @@ static cl::opt<RegisterRegAlloc::FunctionPassCtor, false,
                RegisterPassParser<RegisterRegAlloc> >
 RegAlloc("regalloc",
          cl::init(&createLinearScanRegisterAllocator),
-         cl::desc("Register allocator to use: (default = linearscan)")); 
+         cl::desc("Register allocator to use (default=linearscan)")); 
 
 
 //===---------------------------------------------------------------------===//
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index f43395f..424181c 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -460,6 +460,8 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
   for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin();
        I != E; --Count) {
     MachineInstr *MI = --I;
+    if (MI->isDebugValue())
+      continue;
 
     // Update liveness.  Registers that are defed but not used in this
     // instruction are now dead. Mark register and all subregs as they
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index e3df2e4..d7179b3 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -205,10 +205,9 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
       // Process each use instruction once.
       for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg),
              UE = mri_->use_end(); UI != UE; ++UI) {
-        MachineInstr *RMI = &*UI;
-        MachineBasicBlock *RMBB = RMI->getParent();
-        if (RMBB == MBB)
+        if (UI.getOperand().isUndef())
           continue;
+        MachineInstr *RMI = &*UI;
         if (ModInsts.insert(RMI))
           RUses.push_back(RMI);
       }
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 036f59a..138e711 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -175,9 +175,10 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
     MachineBasicBlock::iterator I = *i;
 
     // If call frames are not being included as part of the stack frame, and
-    // there is no dynamic allocation (therefore referencing frame slots off
-    // sp), leave the pseudo ops alone. We'll eliminate them later.
-    if (RegInfo->hasReservedCallFrame(Fn) || RegInfo->hasFP(Fn))
+    // the target doesn't indicate otherwise, remove the call frame pseudos
+    // here. The sub/add sp instruction pairs are still inserted, but we don't
+    // need to track the SP adjustment for frame index elimination.
+    if (RegInfo->canSimplifyCallFramePseudos(Fn))
       RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I);
   }
 }
@@ -476,8 +477,6 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   // Loop over all of the stack objects, assigning sequential addresses...
   MachineFrameInfo *FFI = Fn.getFrameInfo();
 
-  unsigned MaxAlign = 1;
-
   // Start at the beginning of the local area.
   // The Offset is the distance from the stack top in the direction
   // of stack growth -- so it's always nonnegative.
@@ -517,9 +516,6 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
       Offset += FFI->getObjectSize(i);
 
       unsigned Align = FFI->getObjectAlignment(i);
-      // If the alignment of this object is greater than that of the stack,
-      // then increase the stack alignment to match.
-      MaxAlign = std::max(MaxAlign, Align);
       // Adjust to alignment boundary
       Offset = (Offset+Align-1)/Align*Align;
 
@@ -529,9 +525,6 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
     int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex;
     for (int i = MaxCSFI; i >= MinCSFI ; --i) {
       unsigned Align = FFI->getObjectAlignment(i);
-      // If the alignment of this object is greater than that of the stack,
-      // then increase the stack alignment to match.
-      MaxAlign = std::max(MaxAlign, Align);
       // Adjust to alignment boundary
       Offset = (Offset+Align-1)/Align*Align;
 
@@ -540,6 +533,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
     }
   }
 
+  unsigned MaxAlign = FFI->getMaxAlignment();
+
   // Make sure the special register scavenging spill slot is closest to the
   // frame pointer if a frame pointer is required.
   const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
@@ -605,11 +600,6 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
 
   // Update frame info to pretend that this is part of the stack...
   FFI->setStackSize(Offset - LocalAreaOffset);
-
-  // Remember the required stack alignment in case targets need it to perform
-  // dynamic stack alignment.
-  if (MaxAlign > FFI->getMaxAlignment())
-    FFI->setMaxAlignment(MaxAlign);
 }
 
 
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
index 7fb3e6e..5e86e5a 100644
--- a/lib/CodeGen/PseudoSourceValue.cpp
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -18,19 +18,38 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Mutex.h"
 #include <map>
 using namespace llvm;
 
-static ManagedStatic<PseudoSourceValue[4]> PSVs;
+namespace {
+struct PSVGlobalsTy {
+  // PseudoSourceValues are immutable so don't need locking.
+  const PseudoSourceValue PSVs[4];
+  sys::Mutex Lock;  // Guards FSValues, but not the values inside it.
+  std::map<int, const PseudoSourceValue *> FSValues;
+
+  PSVGlobalsTy() : PSVs() {}
+  ~PSVGlobalsTy() {
+    for (std::map<int, const PseudoSourceValue *>::iterator
+           I = FSValues.begin(), E = FSValues.end(); I != E; ++I) {
+      delete I->second;
+    }
+  }
+};
+
+static ManagedStatic<PSVGlobalsTy> PSVGlobals;
+
+}  // anonymous namespace
 
 const PseudoSourceValue *PseudoSourceValue::getStack()
-{ return &(*PSVs)[0]; }
+{ return &PSVGlobals->PSVs[0]; }
 const PseudoSourceValue *PseudoSourceValue::getGOT()
-{ return &(*PSVs)[1]; }
+{ return &PSVGlobals->PSVs[1]; }
 const PseudoSourceValue *PseudoSourceValue::getJumpTable()
-{ return &(*PSVs)[2]; }
+{ return &PSVGlobals->PSVs[2]; }
 const PseudoSourceValue *PseudoSourceValue::getConstantPool()
-{ return &(*PSVs)[3]; }
+{ return &PSVGlobals->PSVs[3]; }
 
 static const char *const PSVNames[] = {
   "Stack",
@@ -48,13 +67,13 @@ PseudoSourceValue::PseudoSourceValue(enum ValueTy Subclass) :
         Subclass) {}
 
 void PseudoSourceValue::printCustom(raw_ostream &O) const {
-  O << PSVNames[this - *PSVs];
+  O << PSVNames[this - PSVGlobals->PSVs];
 }
 
-static ManagedStatic<std::map<int, const PseudoSourceValue *> > FSValues;
-
 const PseudoSourceValue *PseudoSourceValue::getFixedStack(int FI) {
-  const PseudoSourceValue *&V = (*FSValues)[FI];
+  PSVGlobalsTy &PG = *PSVGlobals;
+  sys::ScopedLock locked(PG.Lock);
+  const PseudoSourceValue *&V = PG.FSValues[FI];
   if (!V)
     V = new FixedStackPseudoSourceValue(FI);
   return V;
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index 8e44a57..5c5a394 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -334,10 +334,6 @@ namespace {
                             SmallVector<unsigned, 256> &inactiveCounts,
                             bool SkipDGRegs);
 
-    /// assignVirt2StackSlot - assigns this virtual register to a
-    /// stack slot. returns the stack slot
-    int assignVirt2StackSlot(unsigned virtReg);
-
     void ComputeRelatedRegClasses();
 
     template <typename ItTy>
diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp
index 4d2e3a3..04303cf 100644
--- a/lib/CodeGen/RegAllocLocal.cpp
+++ b/lib/CodeGen/RegAllocLocal.cpp
@@ -490,9 +490,12 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
   // If the virtual register is already available, just update the instruction
   // and return.
   if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) {
-    MarkPhysRegRecentlyUsed(PR);       // Already have this value available!
     MI->getOperand(OpNum).setReg(PR);  // Assign the input register
-    getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum);
+    if (!MI->isDebugValue()) {
+      // Do not do these for DBG_VALUE as they can affect codegen.
+      MarkPhysRegRecentlyUsed(PR);       // Already have this value available!
+      getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum);
+    }
     return MI;
   }
 
@@ -609,6 +612,8 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
   DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > LastUseDef;
   for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
        I != E; ++I) {
+    if (I->isDebugValue())
+      continue;
     for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
       MachineOperand& MO = I->getOperand(i);
       // Uses don't trigger any flags, but we need to save
@@ -691,7 +696,13 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
     bool usedOutsideBlock = isPhysReg ? false :   
           UsedInMultipleBlocks.test(MO.getReg() -  
                                     TargetRegisterInfo::FirstVirtualRegister);
-    if (!isPhysReg && !usedOutsideBlock)
+    if (!isPhysReg && !usedOutsideBlock) {
+      // DBG_VALUE complicates this:  if the only refs of a register outside
+      // this block are DBG_VALUE, we can't keep the reg live just for that,
+      // as it will cause the reg to be spilled at the end of this block when
+      // it wouldn't have been otherwise.  Nullify the DBG_VALUEs when that
+      // happens.
+      bool UsedByDebugValueOnly = false;
       for (MachineRegisterInfo::reg_iterator UI = MRI.reg_begin(MO.getReg()),
            UE = MRI.reg_end(); UI != UE; ++UI)
         // Two cases:
@@ -699,12 +710,26 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
         // - used in the same block before it is defined (loop)
         if (UI->getParent() != &MBB ||
             (MO.isDef() && UI.getOperand().isUse() && precedes(&*UI, MI))) {
+          if (UI->isDebugValue()) {
+            UsedByDebugValueOnly = true;
+            continue;
+          }
+          // A non-DBG_VALUE use means we can leave DBG_VALUE uses alone.
           UsedInMultipleBlocks.set(MO.getReg() - 
                                    TargetRegisterInfo::FirstVirtualRegister);
           usedOutsideBlock = true;
+          UsedByDebugValueOnly = false;
           break;
         }
-    
+      if (UsedByDebugValueOnly)
+        for (MachineRegisterInfo::reg_iterator UI = MRI.reg_begin(MO.getReg()),
+             UE = MRI.reg_end(); UI != UE; ++UI)
+          if (UI->isDebugValue() &&
+              (UI->getParent() != &MBB ||
+               (MO.isDef() && precedes(&*UI, MI))))
+            UI.getOperand().setReg(0U);
+    }
+  
     // Physical registers and those that are not live-out of the block
     // are killed/dead at their last use/def within this block.
     if (isPhysReg || !usedOutsideBlock) {
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 2701faf..81cfd8f 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -57,7 +57,7 @@
 using namespace llvm;
 
 static RegisterRegAlloc
-registerPBQPRepAlloc("pbqp", "PBQP register allocator.",
+registerPBQPRepAlloc("pbqp", "PBQP register allocator",
                        llvm::createPBQPRegisterAllocator);
 
 static cl::opt<bool>
@@ -867,10 +867,6 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
   // Find the vreg intervals in need of allocation.
   findVRegIntervalsToAlloc();
 
-  // If there aren't any then we're done here.
-  if (vregIntervalsToAlloc.empty() && emptyVRegIntervals.empty())
-    return true;
-
   // If there are non-empty intervals allocate them using pbqp.
   if (!vregIntervalsToAlloc.empty()) {
 
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 56dd533..badf34e 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -72,7 +72,7 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) {
     } else {
       return V;
     }
-    assert(isa<IntegerType>(V->getType()) && "Unexpected operand type!");
+    assert(V->getType()->isIntegerTy() && "Unexpected operand type!");
   } while (1);
 }
 
@@ -87,7 +87,7 @@ static const Value *getUnderlyingObject(const Value *V) {
       break;
     const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
     // If that succeeded in finding a pointer, continue the search.
-    if (!isa<PointerType>(O->getType()))
+    if (!O->getType()->isPointerTy())
       break;
     V = O;
   } while (1);
diff --git a/lib/CodeGen/SelectionDAG/Android.mk b/lib/CodeGen/SelectionDAG/Android.mk
new file mode 100644
index 0000000..eb15a18
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/Android.mk
@@ -0,0 +1,48 @@
+LOCAL_PATH:= $(call my-dir)
+
+codegen_selectiondag_SRC_FILES :=	\
+	CallingConvLower.cpp	\
+	DAGCombiner.cpp	\
+	FastISel.cpp	\
+	FunctionLoweringInfo.cpp	\
+	InstrEmitter.cpp	\
+	LegalizeDAG.cpp	\
+	LegalizeFloatTypes.cpp	\
+	LegalizeIntegerTypes.cpp	\
+	LegalizeTypes.cpp	\
+	LegalizeTypesGeneric.cpp	\
+	LegalizeVectorOps.cpp	\
+	LegalizeVectorTypes.cpp	\
+	ScheduleDAGFast.cpp	\
+	ScheduleDAGList.cpp	\
+	ScheduleDAGRRList.cpp	\
+	ScheduleDAGSDNodes.cpp	\
+	SelectionDAG.cpp	\
+	SelectionDAGBuilder.cpp	\
+	SelectionDAGISel.cpp	\
+	SelectionDAGPrinter.cpp	\
+	TargetLowering.cpp  
+
+# For the host
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(codegen_selectiondag_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMSelectionDAG
+
+include $(LLVM_HOST_BUILD_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+# For the device
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(codegen_selectiondag_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMSelectionDAG
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 9189e71..3be6b43 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1064,7 +1064,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
   if (VT.isInteger() && !VT.isVector()) {
     APInt LHSZero, LHSOne;
     APInt RHSZero, RHSOne;
-    APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
+    APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
     DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
 
     if (LHSZero.getBoolValue()) {
@@ -1136,7 +1136,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
   // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
   APInt LHSZero, LHSOne;
   APInt RHSZero, RHSOne;
-  APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
+  APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
   DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
 
   if (LHSZero.getBoolValue()) {
@@ -1758,7 +1758,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   EVT VT = N1.getValueType();
-  unsigned BitWidth = VT.getSizeInBits();
+  unsigned BitWidth = VT.getScalarType().getSizeInBits();
 
   // fold vector ops
   if (VT.isVector()) {
@@ -1786,7 +1786,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   SDValue RAND = ReassociateOps(ISD::AND, N->getDebugLoc(), N0, N1);
   if (RAND.getNode() != 0)
     return RAND;
-  // fold (and (or x, 0xFFFF), 0xFF) -> 0xFF
+  // fold (and (or x, C), D) -> D if (C & D) == D
   if (N1C && N0.getOpcode() == ISD::OR)
     if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
       if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
@@ -1872,16 +1872,17 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
     EVT MemVT = LN0->getMemoryVT();
     // If we zero all the possible extended bits, then we can turn this into
     // a zextload if we are running before legalize or the operation is legal.
-    unsigned BitWidth = N1.getValueSizeInBits();
+    unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
-                                     BitWidth - MemVT.getSizeInBits())) &&
+                           BitWidth - MemVT.getScalarType().getSizeInBits())) &&
         ((!LegalOperations && !LN0->isVolatile()) ||
          TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
                                        LN0->getChain(), LN0->getBasePtr(),
                                        LN0->getSrcValue(),
                                        LN0->getSrcValueOffset(), MemVT,
-                                       LN0->isVolatile(), LN0->getAlignment());
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
       AddToWorkList(N);
       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
@@ -1894,16 +1895,17 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
     EVT MemVT = LN0->getMemoryVT();
     // If we zero all the possible extended bits, then we can turn this into
     // a zextload if we are running before legalize or the operation is legal.
-    unsigned BitWidth = N1.getValueSizeInBits();
+    unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
-                                     BitWidth - MemVT.getSizeInBits())) &&
+                           BitWidth - MemVT.getScalarType().getSizeInBits())) &&
         ((!LegalOperations && !LN0->isVolatile()) ||
          TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
                                        LN0->getChain(),
                                        LN0->getBasePtr(), LN0->getSrcValue(),
                                        LN0->getSrcValueOffset(), MemVT,
-                                       LN0->isVolatile(), LN0->getAlignment());
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
       AddToWorkList(N);
       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
@@ -1935,7 +1937,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
             DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
                            LN0->getChain(), LN0->getBasePtr(),
                            LN0->getSrcValue(), LN0->getSrcValueOffset(),
-                           ExtVT, LN0->isVolatile(), LN0->getAlignment());
+                           ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+                           LN0->getAlignment());
           AddToWorkList(N);
           CombineTo(LN0, NewLoad, NewLoad.getValue(1));
           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
@@ -1970,7 +1973,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
             DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
                            LN0->getChain(), NewPtr,
                            LN0->getSrcValue(), LN0->getSrcValueOffset(),
-                           ExtVT, LN0->isVolatile(), Alignment);
+                           ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+                           Alignment);
           AddToWorkList(N);
           CombineTo(LN0, Load, Load.getValue(1));
           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
@@ -2021,13 +2025,15 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
   if (ROR.getNode() != 0)
     return ROR;
   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
+  // iff (c1 & c2) == 0.
   if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
              isa<ConstantSDNode>(N0.getOperand(1))) {
     ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
-    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
-                       DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
-                                   N0.getOperand(0), N1),
-                       DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1));
+    if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0)
+      return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+                         DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
+                                     N0.getOperand(0), N1),
+                         DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1));
   }
   // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
   if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
@@ -2750,7 +2756,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   if (N1C && N0.getOpcode() == ISD::CTLZ &&
       N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) {
     APInt KnownZero, KnownOne;
-    APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
+    APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
     DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne);
 
     // If any of the input bits are KnownOne, then the input couldn't be all
@@ -3143,7 +3149,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
                                        LN0->getBasePtr(), LN0->getSrcValue(),
                                        LN0->getSrcValueOffset(),
                                        N0.getValueType(),
-                                       LN0->isVolatile(), LN0->getAlignment());
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
       CombineTo(N, ExtLoad);
       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
                                   N0.getValueType(), ExtLoad);
@@ -3185,7 +3192,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
                                        LN0->getChain(),
                                        LN0->getBasePtr(), LN0->getSrcValue(),
                                        LN0->getSrcValueOffset(), MemVT,
-                                       LN0->isVolatile(), LN0->getAlignment());
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
       CombineTo(N, ExtLoad);
       CombineTo(N0.getNode(),
                 DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
@@ -3315,7 +3323,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
                                        LN0->getBasePtr(), LN0->getSrcValue(),
                                        LN0->getSrcValueOffset(),
                                        N0.getValueType(),
-                                       LN0->isVolatile(), LN0->getAlignment());
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
       CombineTo(N, ExtLoad);
       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
                                   N0.getValueType(), ExtLoad);
@@ -3357,7 +3366,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
                                        LN0->getChain(),
                                        LN0->getBasePtr(), LN0->getSrcValue(),
                                        LN0->getSrcValueOffset(), MemVT,
-                                       LN0->isVolatile(), LN0->getAlignment());
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
       CombineTo(N, ExtLoad);
       CombineTo(N0.getNode(),
                 DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(),
@@ -3471,7 +3481,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
                                        LN0->getBasePtr(), LN0->getSrcValue(),
                                        LN0->getSrcValueOffset(),
                                        N0.getValueType(),
-                                       LN0->isVolatile(), LN0->getAlignment());
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
       CombineTo(N, ExtLoad);
       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
                                   N0.getValueType(), ExtLoad);
@@ -3513,7 +3524,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
                                      VT, LN0->getChain(), LN0->getBasePtr(),
                                      LN0->getSrcValue(),
                                      LN0->getSrcValueOffset(), MemVT,
-                                     LN0->isVolatile(), LN0->getAlignment());
+                                     LN0->isVolatile(), LN0->isNonTemporal(),
+                                     LN0->getAlignment());
     CombineTo(N, ExtLoad);
     CombineTo(N0.getNode(),
               DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
@@ -3636,10 +3648,11 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
     SDValue Load = (ExtType == ISD::NON_EXTLOAD)
       ? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
                     LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
-                    LN0->isVolatile(), NewAlign)
+                    LN0->isVolatile(), LN0->isNonTemporal(), NewAlign)
       : DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(), NewPtr,
                        LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
-                       ExtVT, LN0->isVolatile(), NewAlign);
+                       ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+                       NewAlign);
 
     // Replace the old load's chain with the new load's chain.
     WorkListRemover DeadNodes(*this);
@@ -3726,7 +3739,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
                                      LN0->getChain(),
                                      LN0->getBasePtr(), LN0->getSrcValue(),
                                      LN0->getSrcValueOffset(), EVT,
-                                     LN0->isVolatile(), LN0->getAlignment());
+                                     LN0->isVolatile(), LN0->isNonTemporal(),
+                                     LN0->getAlignment());
     CombineTo(N, ExtLoad);
     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
@@ -3742,7 +3756,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
                                      LN0->getChain(),
                                      LN0->getBasePtr(), LN0->getSrcValue(),
                                      LN0->getSrcValueOffset(), EVT,
-                                     LN0->isVolatile(), LN0->getAlignment());
+                                     LN0->isVolatile(), LN0->isNonTemporal(),
+                                     LN0->getAlignment());
     CombineTo(N, ExtLoad);
     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
@@ -3826,7 +3841,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
       return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(),
                          LD1->getBasePtr(), LD1->getSrcValue(),
-                         LD1->getSrcValueOffset(), false, Align);
+                         LD1->getSrcValueOffset(), false, false, Align);
   }
 
   return SDValue();
@@ -3896,7 +3911,8 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
       SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(),
                                  LN0->getBasePtr(),
                                  LN0->getSrcValue(), LN0->getSrcValueOffset(),
-                                 LN0->isVolatile(), OrigAlign);
+                                 LN0->isVolatile(), LN0->isNonTemporal(),
+                                 OrigAlign);
       AddToWorkList(N);
       CombineTo(N0.getNode(),
                 DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
@@ -4492,7 +4508,8 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
                                      LN0->getBasePtr(), LN0->getSrcValue(),
                                      LN0->getSrcValueOffset(),
                                      N0.getValueType(),
-                                     LN0->isVolatile(), LN0->getAlignment());
+                                     LN0->isVolatile(), LN0->isNonTemporal(),
+                                     LN0->getAlignment());
     CombineTo(N, ExtLoad);
     CombineTo(N0.getNode(),
               DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(),
@@ -4640,7 +4657,8 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
             DAG.DeleteNode(Trunc);
           }
           // Replace the uses of SRL with SETCC
-          DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
+          WorkListRemover DeadNodes(*this);
+          DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes);
           removeFromWorkList(N1.getNode());
           DAG.DeleteNode(N1.getNode());
           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
@@ -4648,6 +4666,56 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
       }
     }
   }
+  
+  // Transform br(xor(x, y)) -> br(x != y)
+  // Transform br(xor(xor(x,y), 1)) -> br (x == y)
+  if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
+    SDNode *TheXor = N1.getNode();
+    SDValue Op0 = TheXor->getOperand(0);
+    SDValue Op1 = TheXor->getOperand(1);
+    if (Op0.getOpcode() == Op1.getOpcode()) {
+      // Avoid missing important xor optimizations.
+      SDValue Tmp = visitXOR(TheXor);
+      if (Tmp.getNode()) {
+        DEBUG(dbgs() << "\nReplacing.8 ";
+              TheXor->dump(&DAG);
+              dbgs() << "\nWith: ";
+              Tmp.getNode()->dump(&DAG);
+              dbgs() << '\n');
+        WorkListRemover DeadNodes(*this);
+        DAG.ReplaceAllUsesOfValueWith(N1, Tmp, &DeadNodes);
+        removeFromWorkList(TheXor);
+        DAG.DeleteNode(TheXor);
+        return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+                           MVT::Other, Chain, Tmp, N2);
+      }
+    }
+
+    if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
+      bool Equal = false;
+      if (ConstantSDNode *RHSCI = dyn_cast<ConstantSDNode>(Op0))
+        if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() &&
+            Op0.getOpcode() == ISD::XOR) {
+          TheXor = Op0.getNode();
+          Equal = true;
+        }
+
+      EVT SetCCVT = N1.getValueType();
+      if (LegalTypes)
+        SetCCVT = TLI.getSetCCResultType(SetCCVT);
+      SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(),
+                                   SetCCVT,
+                                   Op0, Op1,
+                                   Equal ? ISD::SETEQ : ISD::SETNE);
+      // Replace the uses of XOR with SETCC
+      WorkListRemover DeadNodes(*this);
+      DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes);
+      removeFromWorkList(N1.getNode());
+      DAG.DeleteNode(N1.getNode());
+      return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+                         MVT::Other, Chain, SetCC, N2);
+    }
+  }
 
   return SDValue();
 }
@@ -4960,7 +5028,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
                               LD->getValueType(0),
                               Chain, Ptr, LD->getSrcValue(),
                               LD->getSrcValueOffset(), LD->getMemoryVT(),
-                              LD->isVolatile(), Align);
+                              LD->isVolatile(), LD->isNonTemporal(), Align);
     }
   }
 
@@ -4997,7 +5065,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
       if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) {
         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
-        DEBUG(dbgs() << "\nReplacing.6 ";
+        DEBUG(dbgs() << "\nReplacing.7 ";
               N->dump(&DAG);
               dbgs() << "\nWith: ";
               Undef.getNode()->dump(&DAG);
@@ -5042,7 +5110,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
         ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(),
                                BetterChain, Ptr,
                                LD->getSrcValue(), LD->getSrcValueOffset(),
-                               LD->isVolatile(), LD->getAlignment());
+                               LD->isVolatile(), LD->isNonTemporal(),
+                               LD->getAlignment());
       } else {
         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(),
                                   LD->getValueType(0),
@@ -5050,6 +5119,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
                                   LD->getSrcValueOffset(),
                                   LD->getMemoryVT(),
                                   LD->isVolatile(),
+                                  LD->isNonTemporal(),
                                   LD->getAlignment());
       }
 
@@ -5149,13 +5219,14 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
       SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(),
                                   LD->getChain(), NewPtr,
                                   LD->getSrcValue(), LD->getSrcValueOffset(),
-                                  LD->isVolatile(), NewAlign);
+                                  LD->isVolatile(), LD->isNonTemporal(),
+                                  NewAlign);
       SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD,
                                    DAG.getConstant(NewImm, NewVT));
       SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(),
                                    NewVal, NewPtr,
                                    ST->getSrcValue(), ST->getSrcValueOffset(),
-                                   false, NewAlign);
+                                   false, false, NewAlign);
 
       AddToWorkList(NewPtr.getNode());
       AddToWorkList(NewLD.getNode());
@@ -5184,7 +5255,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
         return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
                                  Ptr, ST->getSrcValue(),
                                  ST->getSrcValueOffset(), ST->getMemoryVT(),
-                                 ST->isVolatile(), Align);
+                                 ST->isVolatile(), ST->isNonTemporal(), Align);
     }
   }
 
@@ -5201,7 +5272,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
          TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
       return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0),
                           Ptr, ST->getSrcValue(),
-                          ST->getSrcValueOffset(), ST->isVolatile(), OrigAlign);
+                          ST->getSrcValueOffset(), ST->isVolatile(),
+                          ST->isNonTemporal(), OrigAlign);
   }
 
   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
@@ -5227,7 +5299,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
           return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
                               Ptr, ST->getSrcValue(),
                               ST->getSrcValueOffset(), ST->isVolatile(),
-                              ST->getAlignment());
+                              ST->isNonTemporal(), ST->getAlignment());
         }
         break;
       case MVT::f64:
@@ -5239,7 +5311,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
           return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
                               Ptr, ST->getSrcValue(),
                               ST->getSrcValueOffset(), ST->isVolatile(),
-                              ST->getAlignment());
+                              ST->isNonTemporal(), ST->getAlignment());
         } else if (!ST->isVolatile() &&
                    TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
           // Many FP stores are not made apparent until after legalize, e.g. for
@@ -5253,18 +5325,21 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
           int SVOffset = ST->getSrcValueOffset();
           unsigned Alignment = ST->getAlignment();
           bool isVolatile = ST->isVolatile();
+          bool isNonTemporal = ST->isNonTemporal();
 
           SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo,
                                      Ptr, ST->getSrcValue(),
                                      ST->getSrcValueOffset(),
-                                     isVolatile, ST->getAlignment());
+                                     isVolatile, isNonTemporal,
+                                     ST->getAlignment());
           Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr,
                             DAG.getConstant(4, Ptr.getValueType()));
           SVOffset += 4;
           Alignment = MinAlign(Alignment, 4U);
           SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi,
                                      Ptr, ST->getSrcValue(),
-                                     SVOffset, isVolatile, Alignment);
+                                     SVOffset, isVolatile, isNonTemporal,
+                                     Alignment);
           return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
                              St0, St1);
         }
@@ -5286,12 +5361,13 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
       if (ST->isTruncatingStore()) {
         ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr,
                                       ST->getSrcValue(),ST->getSrcValueOffset(),
-                                      ST->getMemoryVT(),
-                                      ST->isVolatile(), ST->getAlignment());
+                                      ST->getMemoryVT(), ST->isVolatile(),
+                                      ST->isNonTemporal(), ST->getAlignment());
       } else {
         ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr,
                                  ST->getSrcValue(), ST->getSrcValueOffset(),
-                                 ST->isVolatile(), ST->getAlignment());
+                                 ST->isVolatile(), ST->isNonTemporal(),
+                                 ST->getAlignment());
       }
 
       // Create token to keep both nodes around.
@@ -5325,7 +5401,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
       return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter,
                                Ptr, ST->getSrcValue(),
                                ST->getSrcValueOffset(), ST->getMemoryVT(),
-                               ST->isVolatile(), ST->getAlignment());
+                               ST->isVolatile(), ST->isNonTemporal(),
+                               ST->getAlignment());
 
     // Otherwise, see if we can simplify the operation with
     // SimplifyDemandedBits, which only works if the value has a single use.
@@ -5358,7 +5435,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
     return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0),
                              Ptr, ST->getSrcValue(),
                              ST->getSrcValueOffset(), ST->getMemoryVT(),
-                             ST->isVolatile(), ST->getAlignment());
+                             ST->isVolatile(), ST->isNonTemporal(),
+                             ST->getAlignment());
   }
 
   return ReduceLoadOpStoreWidth(N);
@@ -5503,7 +5581,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
 
     return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
                        LN0->getSrcValue(), LN0->getSrcValueOffset(),
-                       LN0->isVolatile(), Align);
+                       LN0->isVolatile(), LN0->isNonTemporal(), Align);
   }
 
   return SDValue();
@@ -5883,6 +5961,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
                                LLD->getChain(),
                                Addr, 0, 0,
                                LLD->isVolatile(),
+                               LLD->isNonTemporal(),
                                LLD->getAlignment());
           } else {
             Load = DAG.getExtLoad(LLD->getExtensionType(),
@@ -5891,6 +5970,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
                                   LLD->getChain(), Addr, 0, 0,
                                   LLD->getMemoryVT(),
                                   LLD->isVolatile(),
+                                  LLD->isNonTemporal(),
                                   LLD->getAlignment());
           }
 
@@ -5998,7 +6078,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
                             CstOffset);
         return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
                            PseudoSourceValue::getConstantPool(), 0, false,
-                           Alignment);
+                           false, Alignment);
 
       }
     }  
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 35ef5b7..1d76c7c 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -350,6 +350,34 @@ bool FastISel::SelectCall(User *I) {
     (void)TargetSelectInstruction(cast<Instruction>(I));
     return true;
   }
+  case Intrinsic::dbg_value: {
+    // This requires target support, but right now X86 is the only Fast target.
+    DbgValueInst *DI = cast<DbgValueInst>(I);
+    const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
+    Value *V = DI->getValue();
+    if (!V) {
+      // Currently the optimizer can produce this; insert an undef to
+      // help debugging.  Probably the optimizer should not do this.
+      BuildMI(MBB, DL, II).addReg(0U).addImm(DI->getOffset()).
+                                     addMetadata(DI->getVariable());
+    } else if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+      BuildMI(MBB, DL, II).addImm(CI->getZExtValue()).addImm(DI->getOffset()).
+                                     addMetadata(DI->getVariable());
+    } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+      BuildMI(MBB, DL, II).addFPImm(CF).addImm(DI->getOffset()).
+                                     addMetadata(DI->getVariable());
+    } else if (unsigned Reg = lookUpRegForValue(V)) {
+      BuildMI(MBB, DL, II).addReg(Reg, RegState::Debug).addImm(DI->getOffset()).
+                                     addMetadata(DI->getVariable());
+    } else {
+      // We can't yet handle anything else here because it would require
+      // generating code, thus altering codegen because of debug info.
+      // Insert an undef so we can see what we dropped.
+      BuildMI(MBB, DL, II).addReg(0U).addImm(DI->getOffset()).
+                                     addMetadata(DI->getVariable());
+    }     
+    return true;
+  }
   case Intrinsic::eh_exception: {
     EVT VT = TLI.getValueType(I->getType());
     switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) {
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 02fe85d..625de11 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -15,6 +15,7 @@
 
 #define DEBUG_TYPE "instr-emitter"
 #include "InstrEmitter.h"
+#include "SDDbgValue.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -497,6 +498,56 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
   assert(isNew && "Node emitted out of order - early");
 }
 
+/// EmitDbgValue - Generate any debug info that refers to this Node.  Constant
+/// dbg_value is not handled here.
+void
+InstrEmitter::EmitDbgValue(SDNode *Node,
+                           DenseMap<SDValue, unsigned> &VRBaseMap,
+                           SDDbgValue *sd) {
+  if (!Node->getHasDebugValue())
+    return;
+  if (!sd)
+    return;
+  unsigned VReg = getVR(SDValue(sd->getSDNode(), sd->getResNo()), VRBaseMap);
+  const TargetInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
+  DebugLoc DL = sd->getDebugLoc();
+  MachineInstr *MI;
+  if (VReg) {
+    MI = BuildMI(*MF, DL, II).addReg(VReg, RegState::Debug).
+                              addImm(sd->getOffset()).
+                              addMetadata(sd->getMDPtr());
+  } else {
+    // Insert an Undef so we can see what we dropped.
+    MI = BuildMI(*MF, DL, II).addReg(0U).addImm(sd->getOffset()).
+                                    addMetadata(sd->getMDPtr());
+  }
+  MBB->insert(InsertPos, MI);
+}
+
+/// EmitDbgValue - Generate constant debug info.  No SDNode is involved.
+void
+InstrEmitter::EmitDbgValue(SDDbgValue *sd) {
+  if (!sd)
+    return;
+  const TargetInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
+  DebugLoc DL = sd->getDebugLoc();
+  MachineInstr *MI;
+  Value *V = sd->getConst();
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+    MI = BuildMI(*MF, DL, II).addImm(CI->getZExtValue()).
+                                   addImm(sd->getOffset()).
+                                   addMetadata(sd->getMDPtr());
+  } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+    MI = BuildMI(*MF, DL, II).addFPImm(CF).addImm(sd->getOffset()).
+                                   addMetadata(sd->getMDPtr());
+  } else {
+    // Insert an Undef so we can see what we dropped.
+    MI = BuildMI(*MF, DL, II).addReg(0U).addImm(sd->getOffset()).
+                                    addMetadata(sd->getMDPtr());
+  }
+  MBB->insert(InsertPos, MI);
+}
+
 /// EmitNode - Generate machine code for a node and needed dependencies.
 ///
 void InstrEmitter::EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
index 91817e4..4fe9f19 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -23,6 +23,7 @@
 namespace llvm {
 
 class TargetInstrDesc;
+class SDDbgValue;
 
 class InstrEmitter {
   MachineFunction *MF;
@@ -97,6 +98,16 @@ public:
   /// MachineInstr.
   static unsigned CountOperands(SDNode *Node);
 
+  /// EmitDbgValue - Generate any debug info that refers to this Node.  Constant
+  /// dbg_value is not handled here.
+  void EmitDbgValue(SDNode *Node,
+                    DenseMap<SDValue, unsigned> &VRBaseMap,
+                    SDDbgValue* sd);
+
+
+  /// EmitDbgValue - Generate a constant DBG_VALUE.  No node is involved.
+  void EmitDbgValue(SDDbgValue* sd);
+
   /// EmitNode - Generate machine code for a node and needed dependencies.
   ///
   void EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 78e6e4e..f498263 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -377,9 +377,10 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
     return DAG.getExtLoad(ISD::EXTLOAD, dl,
                           OrigVT, DAG.getEntryNode(),
                           CPIdx, PseudoSourceValue::getConstantPool(),
-                          0, VT, false, Alignment);
+                          0, VT, false, false, Alignment);
   return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
-                     PseudoSourceValue::getConstantPool(), 0, false, Alignment);
+                     PseudoSourceValue::getConstantPool(), 0, false, false,
+                     Alignment);
 }
 
 /// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores.
@@ -402,7 +403,8 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
       // FIXME: Does not handle truncating floating point stores!
       SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, intVT, Val);
       return DAG.getStore(Chain, dl, Result, Ptr, ST->getSrcValue(),
-                          SVOffset, ST->isVolatile(), Alignment);
+                          SVOffset, ST->isVolatile(), ST->isNonTemporal(),
+                          Alignment);
     } else {
       // Do a (aligned) store to a stack slot, then copy from the stack slot
       // to the final destination using (unaligned) integer loads and stores.
@@ -418,7 +420,8 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
 
       // Perform the original store, only redirected to the stack slot.
       SDValue Store = DAG.getTruncStore(Chain, dl,
-                                        Val, StackPtr, NULL, 0, StoredVT);
+                                        Val, StackPtr, NULL, 0, StoredVT,
+                                        false, false, 0);
       SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
       SmallVector<SDValue, 8> Stores;
       unsigned Offset = 0;
@@ -426,11 +429,12 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
       // Do all but one copies using the full register width.
       for (unsigned i = 1; i < NumRegs; i++) {
         // Load one integer register's worth from the stack slot.
-        SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, NULL, 0);
+        SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, NULL, 0,
+                                   false, false, 0);
         // Store it to the final location.  Remember the store.
         Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
                                       ST->getSrcValue(), SVOffset + Offset,
-                                      ST->isVolatile(),
+                                      ST->isVolatile(), ST->isNonTemporal(),
                                       MinAlign(ST->getAlignment(), Offset)));
         // Increment the pointers.
         Offset += RegBytes;
@@ -446,11 +450,12 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
 
       // Load from the stack slot.
       SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
-                                    NULL, 0, MemVT);
+                                    NULL, 0, MemVT, false, false, 0);
 
       Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
                                          ST->getSrcValue(), SVOffset + Offset,
                                          MemVT, ST->isVolatile(),
+                                         ST->isNonTemporal(),
                                          MinAlign(ST->getAlignment(), Offset)));
       // The order of the stores doesn't matter - say it with a TokenFactor.
       return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
@@ -474,13 +479,14 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
   SDValue Store1, Store2;
   Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr,
                              ST->getSrcValue(), SVOffset, NewStoredVT,
-                             ST->isVolatile(), Alignment);
+                             ST->isVolatile(), ST->isNonTemporal(), Alignment);
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                     DAG.getConstant(IncrementSize, TLI.getPointerTy()));
   Alignment = MinAlign(Alignment, IncrementSize);
   Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr,
                              ST->getSrcValue(), SVOffset + IncrementSize,
-                             NewStoredVT, ST->isVolatile(), Alignment);
+                             NewStoredVT, ST->isVolatile(), ST->isNonTemporal(),
+                             Alignment);
 
   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
 }
@@ -502,7 +508,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
       // then bitconvert to floating point or vector.
       SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getSrcValue(),
                                     SVOffset, LD->isVolatile(),
-                                    LD->getAlignment());
+                                    LD->isNonTemporal(), LD->getAlignment());
       SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, LoadedVT, newLoad);
       if (VT.isFloatingPoint() && LoadedVT != VT)
         Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result);
@@ -530,10 +536,11 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
         // Load one integer register's worth from the original location.
         SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, LD->getSrcValue(),
                                    SVOffset + Offset, LD->isVolatile(),
+                                   LD->isNonTemporal(),
                                    MinAlign(LD->getAlignment(), Offset));
         // Follow the load with a store to the stack slot.  Remember the store.
         Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
-                                      NULL, 0));
+                                      NULL, 0, false, false, 0));
         // Increment the pointers.
         Offset += RegBytes;
         Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
@@ -546,12 +553,13 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
       SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
                                     LD->getSrcValue(), SVOffset + Offset,
                                     MemVT, LD->isVolatile(),
+                                    LD->isNonTemporal(),
                                     MinAlign(LD->getAlignment(), Offset));
       // Follow the load with a store to the stack slot.  Remember the store.
       // On big-endian machines this requires a truncating store to ensure
       // that the bits end up in the right place.
       Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr,
-                                         NULL, 0, MemVT));
+                                         NULL, 0, MemVT, false, false, 0));
 
       // The order of the stores doesn't matter - say it with a TokenFactor.
       SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
@@ -559,7 +567,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
 
       // Finally, perform the original load only redirected to the stack slot.
       Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
-                            NULL, 0, LoadedVT);
+                            NULL, 0, LoadedVT, false, false, 0);
 
       // Callers expect a MERGE_VALUES node.
       SDValue Ops[] = { Load, TF };
@@ -588,20 +596,22 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
   SDValue Lo, Hi;
   if (TLI.isLittleEndian()) {
     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(),
-                        SVOffset, NewLoadedVT, LD->isVolatile(), Alignment);
+                        SVOffset, NewLoadedVT, LD->isVolatile(),
+                        LD->isNonTemporal(), Alignment);
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                       DAG.getConstant(IncrementSize, TLI.getPointerTy()));
     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(),
                         SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(),
-                        MinAlign(Alignment, IncrementSize));
+                        LD->isNonTemporal(), MinAlign(Alignment, IncrementSize));
   } else {
     Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(),
-                        SVOffset, NewLoadedVT, LD->isVolatile(), Alignment);
+                        SVOffset, NewLoadedVT, LD->isVolatile(),
+                        LD->isNonTemporal(), Alignment);
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                       DAG.getConstant(IncrementSize, TLI.getPointerTy()));
     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(),
                         SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(),
-                        MinAlign(Alignment, IncrementSize));
+                        LD->isNonTemporal(), MinAlign(Alignment, IncrementSize));
   }
 
   // aggregate the two parts
@@ -643,7 +653,8 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
 
   // Store the vector.
   SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr,
-                            PseudoSourceValue::getFixedStack(SPFI), 0);
+                            PseudoSourceValue::getFixedStack(SPFI), 0,
+                            false, false, 0);
 
   // Truncate or zero extend offset to target pointer type.
   unsigned CastOpc = IdxVT.bitsGT(PtrVT) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
@@ -654,10 +665,12 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
   SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr);
   // Store the scalar value.
   Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2,
-                         PseudoSourceValue::getFixedStack(SPFI), 0, EltVT);
+                         PseudoSourceValue::getFixedStack(SPFI), 0, EltVT,
+                         false, false, 0);
   // Load the updated vector.
   return DAG.getLoad(VT, dl, Ch, StackPtr,
-                     PseudoSourceValue::getFixedStack(SPFI), 0);
+                     PseudoSourceValue::getFixedStack(SPFI), 0,
+                     false, false, 0);
 }
 
 
@@ -702,6 +715,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
   int SVOffset = ST->getSrcValueOffset();
   unsigned Alignment = ST->getAlignment();
   bool isVolatile = ST->isVolatile();
+  bool isNonTemporal = ST->isNonTemporal();
   DebugLoc dl = ST->getDebugLoc();
   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
     if (CFP->getValueType(0) == MVT::f32 &&
@@ -710,14 +724,14 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
                                       bitcastToAPInt().zextOrTrunc(32),
                               MVT::i32);
       return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
-                          SVOffset, isVolatile, Alignment);
+                          SVOffset, isVolatile, isNonTemporal, Alignment);
     } else if (CFP->getValueType(0) == MVT::f64) {
       // If this target supports 64-bit registers, do a single 64-bit store.
       if (getTypeAction(MVT::i64) == Legal) {
         Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
                                   zextOrTrunc(64), MVT::i64);
         return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
-                            SVOffset, isVolatile, Alignment);
+                            SVOffset, isVolatile, isNonTemporal, Alignment);
       } else if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) {
         // Otherwise, if the target supports 32-bit registers, use 2 32-bit
         // stores.  If the target supports neither 32- nor 64-bits, this
@@ -728,11 +742,11 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
         if (TLI.isBigEndian()) std::swap(Lo, Hi);
 
         Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getSrcValue(),
-                          SVOffset, isVolatile, Alignment);
+                          SVOffset, isVolatile, isNonTemporal, Alignment);
         Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
                             DAG.getIntPtrConstant(4));
         Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), SVOffset+4,
-                          isVolatile, MinAlign(Alignment, 4U));
+                          isVolatile, isNonTemporal, MinAlign(Alignment, 4U));
 
         return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
       }
@@ -1108,7 +1122,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
 
         Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getSrcValue(),
                            LD->getSrcValueOffset(),
-                           LD->isVolatile(), LD->getAlignment());
+                           LD->isVolatile(), LD->isNonTemporal(),
+                           LD->getAlignment());
         Tmp3 = LegalizeOp(DAG.getNode(ISD::BIT_CONVERT, dl, VT, Tmp1));
         Tmp4 = LegalizeOp(Tmp1.getValue(1));
         break;
@@ -1125,6 +1140,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
       int SVOffset = LD->getSrcValueOffset();
       unsigned Alignment = LD->getAlignment();
       bool isVolatile = LD->isVolatile();
+      bool isNonTemporal = LD->isNonTemporal();
 
       if (SrcWidth != SrcVT.getStoreSizeInBits() &&
           // Some targets pretend to have an i1 loading operation, and actually
@@ -1150,7 +1166,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
 
         Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
                                 Tmp1, Tmp2, LD->getSrcValue(), SVOffset,
-                                NVT, isVolatile, Alignment);
+                                NVT, isVolatile, isNonTemporal, Alignment);
 
         Ch = Result.getValue(1); // The chain.
 
@@ -1187,7 +1203,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl,
                               Node->getValueType(0), Tmp1, Tmp2,
                               LD->getSrcValue(), SVOffset, RoundVT, isVolatile,
-                              Alignment);
+                              isNonTemporal, Alignment);
 
           // Load the remaining ExtraWidth bits.
           IncrementSize = RoundWidth / 8;
@@ -1195,7 +1211,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
                              DAG.getIntPtrConstant(IncrementSize));
           Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
                               LD->getSrcValue(), SVOffset + IncrementSize,
-                              ExtraVT, isVolatile,
+                              ExtraVT, isVolatile, isNonTemporal,
                               MinAlign(Alignment, IncrementSize));
 
           // Build a factor node to remember that this load is independent of the
@@ -1215,7 +1231,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           // Load the top RoundWidth bits.
           Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
                               LD->getSrcValue(), SVOffset, RoundVT, isVolatile,
-                              Alignment);
+                              isNonTemporal, Alignment);
 
           // Load the remaining ExtraWidth bits.
           IncrementSize = RoundWidth / 8;
@@ -1224,7 +1240,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl,
                               Node->getValueType(0), Tmp1, Tmp2,
                               LD->getSrcValue(), SVOffset + IncrementSize,
-                              ExtraVT, isVolatile,
+                              ExtraVT, isVolatile, isNonTemporal,
                               MinAlign(Alignment, IncrementSize));
 
           // Build a factor node to remember that this load is independent of the
@@ -1284,7 +1300,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
               (SrcVT == MVT::f64 && Node->getValueType(0) == MVT::f128)) {
             SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getSrcValue(),
                                        LD->getSrcValueOffset(),
-                                       LD->isVolatile(), LD->getAlignment());
+                                       LD->isVolatile(), LD->isNonTemporal(),
+                                       LD->getAlignment());
             Result = DAG.getNode(ISD::FP_EXTEND, dl,
                                  Node->getValueType(0), Load);
             Tmp1 = LegalizeOp(Result);  // Relegalize new nodes.
@@ -1297,7 +1314,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
                                   Tmp1, Tmp2, LD->getSrcValue(),
                                   LD->getSrcValueOffset(), SrcVT,
-                                  LD->isVolatile(), LD->getAlignment());
+                                  LD->isVolatile(), LD->isNonTemporal(),
+                                  LD->getAlignment());
           SDValue ValRes;
           if (ExtType == ISD::SEXTLOAD)
             ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
@@ -1325,6 +1343,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     int SVOffset = ST->getSrcValueOffset();
     unsigned Alignment = ST->getAlignment();
     bool isVolatile = ST->isVolatile();
+    bool isNonTemporal = ST->isNonTemporal();
 
     if (!ST->isTruncatingStore()) {
       if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
@@ -1361,7 +1380,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
                              TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3);
           Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
                                 ST->getSrcValue(), SVOffset, isVolatile,
-                                Alignment);
+                                isNonTemporal, Alignment);
           break;
         }
         break;
@@ -1379,7 +1398,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StVT.getStoreSizeInBits());
         Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT);
         Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
-                                   SVOffset, NVT, isVolatile, Alignment);
+                                   SVOffset, NVT, isVolatile, isNonTemporal,
+                                   Alignment);
       } else if (StWidth & (StWidth - 1)) {
         // If not storing a power-of-2 number of bits, expand as two stores.
         assert(!StVT.isVector() && "Unsupported truncstore!");
@@ -1399,7 +1419,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           // Store the bottom RoundWidth bits.
           Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
                                  SVOffset, RoundVT,
-                                 isVolatile, Alignment);
+                                 isVolatile, isNonTemporal, Alignment);
 
           // Store the remaining ExtraWidth bits.
           IncrementSize = RoundWidth / 8;
@@ -1409,6 +1429,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
                            DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));
           Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(),
                                  SVOffset + IncrementSize, ExtraVT, isVolatile,
+                                 isNonTemporal,
                                  MinAlign(Alignment, IncrementSize));
         } else {
           // Big endian - avoid unaligned stores.
@@ -1417,7 +1438,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
                            DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));
           Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(),
-                                 SVOffset, RoundVT, isVolatile, Alignment);
+                                 SVOffset, RoundVT, isVolatile, isNonTemporal,
+                                 Alignment);
 
           // Store the remaining ExtraWidth bits.
           IncrementSize = RoundWidth / 8;
@@ -1425,6 +1447,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
                              DAG.getIntPtrConstant(IncrementSize));
           Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
                                  SVOffset + IncrementSize, ExtraVT, isVolatile,
+                                 isNonTemporal,
                                  MinAlign(Alignment, IncrementSize));
         }
 
@@ -1457,7 +1480,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           assert(isTypeLegal(StVT) && "Do not know how to expand this store!");
           Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3);
           Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
-                                SVOffset, isVolatile, Alignment);
+                                SVOffset, isVolatile, isNonTemporal,
+                                Alignment);
           break;
         }
       }
@@ -1484,7 +1508,8 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
   DebugLoc dl = Op.getDebugLoc();
   // Store the value to a temporary stack slot, then LOAD the returned part.
   SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
-  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0);
+  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0,
+                            false, false, 0);
 
   // Add the offset to the index.
   unsigned EltSize =
@@ -1500,10 +1525,12 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
   StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);
 
   if (Op.getValueType().isVector())
-    return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0);
+    return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0,
+                       false, false, 0);
   else
     return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
-                          NULL, 0, Vec.getValueType().getVectorElementType());
+                          NULL, 0, Vec.getValueType().getVectorElementType(),
+                          false, false, 0);
 }
 
 SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
@@ -1512,7 +1539,6 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
   // the result as a vector.
   // Create the stack frame object.
   EVT VT = Node->getValueType(0);
-  EVT OpVT = Node->getOperand(0).getValueType();
   EVT EltVT = VT.getVectorElementType();
   DebugLoc dl = Node->getDebugLoc();
   SDValue FIPtr = DAG.CreateStackTemporary(VT);
@@ -1532,13 +1558,16 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
     SDValue Idx = DAG.getConstant(Offset, FIPtr.getValueType());
     Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx);
 
-    // If EltVT smaller than OpVT, only store the bits necessary.
-    if (!OpVT.isVector() && EltVT.bitsLT(OpVT)) {
+    // If the destination vector element type is narrower than the source
+    // element type, only store the bits necessary.
+    if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) {
       Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
-                          Node->getOperand(i), Idx, SV, Offset, EltVT));
+                                         Node->getOperand(i), Idx, SV, Offset,
+                                         EltVT, false, false, 0));
     } else
       Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, 
-                                    Node->getOperand(i), Idx, SV, Offset));
+                                    Node->getOperand(i), Idx, SV, Offset,
+                                    false, false, 0));
   }
 
   SDValue StoreChain;
@@ -1549,7 +1578,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
     StoreChain = DAG.getEntryNode();
 
   // Result is a load from the stack slot.
-  return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0);
+  return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0, false, false, 0);
 }
 
 SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
@@ -1572,12 +1601,14 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
     SDValue StackPtr = DAG.CreateStackTemporary(Tmp2.getValueType());
     SDValue StorePtr = StackPtr, LoadPtr = StackPtr;
     SDValue Ch =
-        DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StorePtr, NULL, 0);
+      DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StorePtr, NULL, 0,
+                   false, false, 0);
     if (Tmp2.getValueType() == MVT::f64 && TLI.isLittleEndian())
       LoadPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(),
                             LoadPtr, DAG.getIntPtrConstant(4));
     SignBit = DAG.getExtLoad(ISD::SEXTLOAD, dl, TLI.getPointerTy(),
-                              Ch, LoadPtr, NULL, 0, MVT::i32);
+                             Ch, LoadPtr, NULL, 0, MVT::i32,
+                             false, false, 0);
   }
   SignBit =
       DAG.getSetCC(dl, TLI.getSetCCResultType(SignBit.getValueType()),
@@ -1701,20 +1732,21 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
 
   if (SrcSize > SlotSize)
     Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
-                              SV, 0, SlotVT, false, SrcAlign);
+                              SV, 0, SlotVT, false, false, SrcAlign);
   else {
     assert(SrcSize == SlotSize && "Invalid store");
     Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
-                         SV, 0, false, SrcAlign);
+                         SV, 0, false, false, SrcAlign);
   }
 
   // Result is a load from the stack slot.
   if (SlotSize == DestSize)
-    return DAG.getLoad(DestVT, dl, Store, FIPtr, SV, 0, false, DestAlign);
+    return DAG.getLoad(DestVT, dl, Store, FIPtr, SV, 0, false, false,
+                       DestAlign);
 
   assert(SlotSize < DestSize && "Unknown extension!");
   return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, SV, 0, SlotVT,
-                        false, DestAlign);
+                        false, false, DestAlign);
 }
 
 SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
@@ -1729,9 +1761,11 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
   SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0),
                                  StackPtr,
                                  PseudoSourceValue::getFixedStack(SPFI), 0,
-                                 Node->getValueType(0).getVectorElementType());
+                                 Node->getValueType(0).getVectorElementType(),
+                                 false, false, 0);
   return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr,
-                     PseudoSourceValue::getFixedStack(SPFI), 0);
+                     PseudoSourceValue::getFixedStack(SPFI), 0,
+                     false, false, 0);
 }
 
 
@@ -1805,7 +1839,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
     unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
     return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
                        PseudoSourceValue::getConstantPool(), 0,
-                       false, Alignment);
+                       false, false, Alignment);
   }
 
   if (!MoreThanTwoValues) {
@@ -1865,8 +1899,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
     TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
                     0, TLI.getLibcallCallingConv(LC), false,
                     /*isReturnValueUsed=*/true,
-                    Callee, Args, DAG,
-                    Node->getDebugLoc(), DAG.GetOrdering(Node));
+                    Callee, Args, DAG, Node->getDebugLoc());
 
   // Legalize the call sequence, starting with the chain.  This will advance
   // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
@@ -1943,13 +1976,16 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
     }
     // store the lo of the constructed double - based on integer input
     SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl,
-                                  Op0Mapped, Lo, NULL, 0);
+                                  Op0Mapped, Lo, NULL, 0,
+                                  false, false, 0);
     // initial hi portion of constructed double
     SDValue InitialHi = DAG.getConstant(0x43300000u, MVT::i32);
     // store the hi of the constructed double - biased exponent
-    SDValue Store2=DAG.getStore(Store1, dl, InitialHi, Hi, NULL, 0);
+    SDValue Store2=DAG.getStore(Store1, dl, InitialHi, Hi, NULL, 0,
+                                false, false, 0);
     // load the constructed double
-    SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, NULL, 0);
+    SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, NULL, 0,
+                               false, false, 0);
     // FP constant to bias correct the final result
     SDValue Bias = DAG.getConstantFP(isSigned ?
                                      BitsToDouble(0x4330000080000000ULL) :
@@ -1972,6 +2008,31 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
     return Result;
   }
   assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
+
+  // Implementation of unsigned i64 to f64 following the algorithm in
+  // __floatundidf in compiler_rt. This implementation has the advantage
+  // of performing rounding correctly, both in the default rounding mode
+  // and in all alternate rounding modes.
+  // TODO: Generalize this for use with other types.
+  if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) {
+    SDValue TwoP52 =
+      DAG.getConstant(UINT64_C(0x4330000000000000), MVT::i64);
+    SDValue TwoP84PlusTwoP52 =
+      DAG.getConstantFP(BitsToDouble(UINT64_C(0x4530000000100000)), MVT::f64);
+    SDValue TwoP84 =
+      DAG.getConstant(UINT64_C(0x4530000000000000), MVT::i64);
+
+    SDValue Lo = DAG.getZeroExtendInReg(Op0, dl, MVT::i32);
+    SDValue Hi = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0,
+                             DAG.getConstant(32, MVT::i64));
+    SDValue LoOr = DAG.getNode(ISD::OR, dl, MVT::i64, Lo, TwoP52);
+    SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84);
+    SDValue LoFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, LoOr);
+    SDValue HiFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, HiOr);
+    SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt, TwoP84PlusTwoP52);
+    return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub);
+  }
+
   SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
 
   SDValue SignSet = DAG.getSetCC(dl, TLI.getSetCCResultType(Op0.getValueType()),
@@ -2004,13 +2065,13 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
   if (DestVT == MVT::f32)
     FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx,
                              PseudoSourceValue::getConstantPool(), 0,
-                             false, Alignment);
+                             false, false, Alignment);
   else {
     FudgeInReg =
       LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
                                 DAG.getEntryNode(), CPIdx,
                                 PseudoSourceValue::getConstantPool(), 0,
-                                MVT::f32, false, Alignment));
+                                MVT::f32, false, false, Alignment));
   }
 
   return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg);
@@ -2271,7 +2332,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
                       false, false, false, false, 0, CallingConv::C, false,
                       /*isReturnValueUsed=*/true,
                       DAG.getExternalSymbol("abort", TLI.getPointerTy()),
-                      Args, DAG, dl, DAG.GetOrdering(Node));
+                      Args, DAG, dl);
     Results.push_back(CallResult.second);
     break;
   }
@@ -2350,16 +2411,19 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     EVT VT = Node->getValueType(0);
     Tmp1 = Node->getOperand(0);
     Tmp2 = Node->getOperand(1);
-    SDValue VAList = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0);
+    SDValue VAList = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0,
+                                 false, false, 0);
     // Increment the pointer, VAList, to the next vaarg
     Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,
                        DAG.getConstant(TLI.getTargetData()->
                                        getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
                                        TLI.getPointerTy()));
     // Store the incremented VAList to the legalized pointer
-    Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Tmp2, V, 0);
+    Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Tmp2, V, 0,
+                        false, false, 0);
     // Load the actual argument out of the pointer VAList
-    Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0));
+    Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0,
+                                  false, false, 0));
     Results.push_back(Results[0].getValue(1));
     break;
   }
@@ -2369,8 +2433,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
     const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
     Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0),
-                       Node->getOperand(2), VS, 0);
-    Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), VD, 0);
+                       Node->getOperand(2), VS, 0, false, false, 0);
+    Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), VD, 0,
+                        false, false, 0);
     Results.push_back(Tmp1);
     break;
   }
@@ -2827,7 +2892,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
 
     EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
     SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr,
-                                PseudoSourceValue::getJumpTable(), 0, MemVT);
+                                PseudoSourceValue::getJumpTable(), 0, MemVT,
+                                false, false, 0);
     Addr = LD;
     if (TLI.getTargetMachine().getRelocationModel() == Reloc::PIC_) {
       // For PIC, the sequence is:
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 4f0fce7..35a7c7c 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -444,7 +444,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
     NewL = DAG.getLoad(L->getAddressingMode(), dl, L->getExtensionType(),
                        NVT, L->getChain(), L->getBasePtr(), L->getOffset(),
                        L->getSrcValue(), L->getSrcValueOffset(), NVT,
-                       L->isVolatile(), L->getAlignment());
+                       L->isVolatile(), L->isNonTemporal(), L->getAlignment());
     // Legalized the chain result - switch anything that used the old chain to
     // use the new one.
     ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
@@ -456,8 +456,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
                      L->getMemoryVT(), L->getChain(),
                      L->getBasePtr(), L->getOffset(),
                      L->getSrcValue(), L->getSrcValueOffset(),
-                     L->getMemoryVT(),
-                     L->isVolatile(), L->getAlignment());
+                     L->getMemoryVT(), L->isVolatile(),
+                     L->isNonTemporal(), L->getAlignment());
   // Legalized the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
@@ -755,7 +755,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
 
   return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(),
                       ST->getSrcValue(), ST->getSrcValueOffset(),
-                      ST->isVolatile(), ST->getAlignment());
+                      ST->isVolatile(), ST->isNonTemporal(),
+                      ST->getAlignment());
 }
 
 
@@ -1073,8 +1074,8 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,
 
   Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr,
                       LD->getSrcValue(), LD->getSrcValueOffset(),
-                      LD->getMemoryVT(),
-                      LD->isVolatile(), LD->getAlignment());
+                      LD->getMemoryVT(), LD->isVolatile(),
+                      LD->isNonTemporal(), LD->getAlignment());
 
   // Remember the chain.
   Chain = Hi.getValue(1);
@@ -1382,6 +1383,6 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
 
   return DAG.getTruncStore(Chain, N->getDebugLoc(), Hi, Ptr,
                            ST->getSrcValue(), ST->getSrcValueOffset(),
-                           ST->getMemoryVT(),
-                           ST->isVolatile(), ST->getAlignment());
+                           ST->getMemoryVT(), ST->isVolatile(),
+                           ST->isNonTemporal(), ST->getAlignment());
 }
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 9932cf4..81f28ad 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -359,7 +359,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
   SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(),
                                N->getSrcValue(), N->getSrcValueOffset(),
                                N->getMemoryVT(), N->isVolatile(),
-                               N->getAlignment());
+                               N->isNonTemporal(), N->getAlignment());
 
   // Legalized the chain result - switch anything that used the old chain to
   // use the new one.
@@ -873,6 +873,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
   int SVOffset = N->getSrcValueOffset();
   unsigned Alignment = N->getAlignment();
   bool isVolatile = N->isVolatile();
+  bool isNonTemporal = N->isNonTemporal();
   DebugLoc dl = N->getDebugLoc();
 
   SDValue Val = GetPromotedInteger(N->getValue());  // Get promoted value.
@@ -880,7 +881,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
   // Truncate the value and store the result.
   return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getSrcValue(),
                            SVOffset, N->getMemoryVT(),
-                           isVolatile, Alignment);
+                           isVolatile, isNonTemporal, Alignment);
 }
 
 SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
@@ -1079,8 +1080,8 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
   SDValue Amt = N->getOperand(1);
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   EVT ShTy = Amt.getValueType();
-  unsigned ShBits = ShTy.getSizeInBits();
-  unsigned NVTBits = NVT.getSizeInBits();
+  unsigned ShBits = ShTy.getScalarType().getSizeInBits();
+  unsigned NVTBits = NVT.getScalarType().getSizeInBits();
   assert(isPowerOf2_32(NVTBits) &&
          "Expanded integer type size not a power of two!");
   DebugLoc dl = N->getDebugLoc();
@@ -1500,6 +1501,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
   int SVOffset = N->getSrcValueOffset();
   unsigned Alignment = N->getAlignment();
   bool isVolatile = N->isVolatile();
+  bool isNonTemporal = N->isNonTemporal();
   DebugLoc dl = N->getDebugLoc();
 
   assert(NVT.isByteSized() && "Expanded type not byte sized!");
@@ -1508,7 +1510,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
     EVT MemVT = N->getMemoryVT();
 
     Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset,
-                        MemVT, isVolatile, Alignment);
+                        MemVT, isVolatile, isNonTemporal, Alignment);
 
     // Remember the chain.
     Ch = Lo.getValue(1);
@@ -1530,7 +1532,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
   } else if (TLI.isLittleEndian()) {
     // Little-endian - low bits are at low addresses.
     Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset,
-                     isVolatile, Alignment);
+                     isVolatile, isNonTemporal, Alignment);
 
     unsigned ExcessBits =
       N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
@@ -1542,7 +1544,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
                       DAG.getIntPtrConstant(IncrementSize));
     Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(),
                         SVOffset+IncrementSize, NEVT,
-                        isVolatile, MinAlign(Alignment, IncrementSize));
+                        isVolatile, isNonTemporal,
+                        MinAlign(Alignment, IncrementSize));
 
     // Build a factor node to remember that this load is independent of the
     // other one.
@@ -1560,7 +1563,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
     Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset,
                         EVT::getIntegerVT(*DAG.getContext(),
                                           MemVT.getSizeInBits() - ExcessBits),
-                        isVolatile, Alignment);
+                        isVolatile, isNonTemporal, Alignment);
 
     // Increment the pointer to the other half.
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
@@ -1569,7 +1572,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getSrcValue(),
                         SVOffset+IncrementSize,
                         EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
-                        isVolatile, MinAlign(Alignment, IncrementSize));
+                        isVolatile, isNonTemporal,
+                        MinAlign(Alignment, IncrementSize));
 
     // Build a factor node to remember that this load is independent of the
     // other one.
@@ -2212,6 +2216,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
   int SVOffset = N->getSrcValueOffset();
   unsigned Alignment = N->getAlignment();
   bool isVolatile = N->isVolatile();
+  bool isNonTemporal = N->isNonTemporal();
   DebugLoc dl = N->getDebugLoc();
   SDValue Lo, Hi;
 
@@ -2220,13 +2225,14 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
   if (N->getMemoryVT().bitsLE(NVT)) {
     GetExpandedInteger(N->getValue(), Lo, Hi);
     return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
-                             N->getMemoryVT(), isVolatile, Alignment);
+                             N->getMemoryVT(), isVolatile, isNonTemporal,
+                             Alignment);
   } else if (TLI.isLittleEndian()) {
     // Little-endian - low bits are at low addresses.
     GetExpandedInteger(N->getValue(), Lo, Hi);
 
     Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
-                      isVolatile, Alignment);
+                      isVolatile, isNonTemporal, Alignment);
 
     unsigned ExcessBits =
       N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
@@ -2238,7 +2244,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
                       DAG.getIntPtrConstant(IncrementSize));
     Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(),
                            SVOffset+IncrementSize, NEVT,
-                           isVolatile, MinAlign(Alignment, IncrementSize));
+                           isVolatile, isNonTemporal,
+                           MinAlign(Alignment, IncrementSize));
     return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
   } else {
     // Big-endian - high bits are at low addresses.  Favor aligned stores at
@@ -2264,7 +2271,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
 
     // Store both the high bits and maybe some of the low bits.
     Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(),
-                           SVOffset, HiVT, isVolatile, Alignment);
+                           SVOffset, HiVT, isVolatile, isNonTemporal,
+                           Alignment);
 
     // Increment the pointer to the other half.
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
@@ -2273,7 +2281,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
     Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(),
                            SVOffset+IncrementSize,
                            EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
-                           isVolatile, MinAlign(Alignment, IncrementSize));
+                           isVolatile, isNonTemporal,
+                           MinAlign(Alignment, IncrementSize));
     return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
   }
 }
@@ -2341,7 +2350,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
     // FIXME: Avoid the extend by constructing the right constant pool?
     SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(),
                                    FudgePtr, NULL, 0, MVT::f32,
-                                   false, Alignment);
+                                   false, false, Alignment);
     return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge);
   }
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 37f36a3..f3e7ca4 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -871,9 +871,10 @@ SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
   // the source and destination types.
   SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT);
   // Emit a store to the stack slot.
-  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, NULL, 0);
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, NULL, 0,
+                               false, false, 0);
   // Result is a load from the stack slot.
-  return DAG.getLoad(DestVT, dl, Store, StackPtr, NULL, 0);
+  return DAG.getLoad(DestVT, dl, Store, StackPtr, NULL, 0, false, false, 0);
 }
 
 /// CustomLowerNode - Replace the node's results with custom code provided
@@ -1033,8 +1034,7 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
     TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
                     false, 0, TLI.getLibcallCallingConv(LC), false,
                     /*isReturnValueUsed=*/true,
-                    Callee, Args, DAG, dl,
-                    DAG.GetOrdering(DAG.getEntryNode().getNode()));
+                    Callee, Args, DAG, dl);
   return CallInfo.first;
 }
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index a1b6ced..5e83b4b 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -122,10 +122,11 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
   const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
 
   // Emit a store to the stack slot.
-  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, SV, 0);
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, SV, 0,
+                               false, false, 0);
 
   // Load the first half from the stack slot.
-  Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, 0);
+  Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, 0, false, false, 0);
 
   // Increment the pointer to the other half.
   unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
@@ -134,7 +135,7 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
 
   // Load the second half from the stack slot.
   Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, IncrementSize, false,
-                   MinAlign(Alignment, IncrementSize));
+                   false, MinAlign(Alignment, IncrementSize));
 
   // Handle endianness of the load.
   if (TLI.isBigEndian())
@@ -205,11 +206,12 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
   int SVOffset = LD->getSrcValueOffset();
   unsigned Alignment = LD->getAlignment();
   bool isVolatile = LD->isVolatile();
+  bool isNonTemporal = LD->isNonTemporal();
 
   assert(NVT.isByteSized() && "Expanded type not byte sized!");
 
   Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset,
-                   isVolatile, Alignment);
+                   isVolatile, isNonTemporal, Alignment);
 
   // Increment the pointer to the other half.
   unsigned IncrementSize = NVT.getSizeInBits() / 8;
@@ -217,7 +219,8 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
                     DAG.getIntPtrConstant(IncrementSize));
   Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(),
                    SVOffset+IncrementSize,
-                   isVolatile, MinAlign(Alignment, IncrementSize));
+                   isVolatile, isNonTemporal,
+                   MinAlign(Alignment, IncrementSize));
 
   // Build a factor node to remember that this load is independent of the
   // other one.
@@ -383,6 +386,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
   int SVOffset = St->getSrcValueOffset();
   unsigned Alignment = St->getAlignment();
   bool isVolatile = St->isVolatile();
+  bool isNonTemporal = St->isNonTemporal();
 
   assert(NVT.isByteSized() && "Expanded type not byte sized!");
   unsigned IncrementSize = NVT.getSizeInBits() / 8;
@@ -394,14 +398,15 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
     std::swap(Lo, Hi);
 
   Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getSrcValue(), SVOffset,
-                    isVolatile, Alignment);
+                    isVolatile, isNonTemporal, Alignment);
 
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                     DAG.getIntPtrConstant(IncrementSize));
   assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!");
   Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getSrcValue(),
                     SVOffset + IncrementSize,
-                    isVolatile, MinAlign(Alignment, IncrementSize));
+                    isVolatile, isNonTemporal,
+                    MinAlign(Alignment, IncrementSize));
 
   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
 }
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index bf95bb5..8363c3a 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -172,7 +172,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
                                DAG.getUNDEF(N->getBasePtr().getValueType()),
                                N->getSrcValue(), N->getSrcValueOffset(),
                                N->getMemoryVT().getVectorElementType(),
-                               N->isVolatile(), N->getOriginalAlignment());
+                               N->isVolatile(), N->isNonTemporal(),
+                               N->getOriginalAlignment());
 
   // Legalized the chain result - switch anything that used the old chain to
   // use the new one.
@@ -366,11 +367,13 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
                              N->getBasePtr(),
                              N->getSrcValue(), N->getSrcValueOffset(),
                              N->getMemoryVT().getVectorElementType(),
-                             N->isVolatile(), N->getAlignment());
+                             N->isVolatile(), N->isNonTemporal(),
+                             N->getAlignment());
 
   return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
                       N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(),
-                      N->isVolatile(), N->getOriginalAlignment());
+                      N->isVolatile(), N->isNonTemporal(),
+                      N->getOriginalAlignment());
 }
 
 
@@ -696,17 +699,20 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
   EVT VecVT = Vec.getValueType();
   EVT EltVT = VecVT.getVectorElementType();
   SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
-  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0);
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0,
+                               false, false, 0);
 
   // Store the new element.  This may be larger than the vector element type,
   // so use a truncating store.
   SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
   unsigned Alignment =
     TLI.getTargetData()->getPrefTypeAlignment(VecVT.getTypeForEVT(*DAG.getContext()));
-  Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT);
+  Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT,
+                            false, false, 0);
 
   // Load the Lo part from the stack slot.
-  Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, NULL, 0);
+  Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, NULL, 0,
+                   false, false, 0);
 
   // Increment the pointer to the other part.
   unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
@@ -715,7 +721,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
 
   // Load the Hi part from the stack slot.
   Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, NULL, 0, false,
-                   MinAlign(Alignment, IncrementSize));
+                   false, MinAlign(Alignment, IncrementSize));
 }
 
 void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
@@ -743,19 +749,20 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
   EVT MemoryVT = LD->getMemoryVT();
   unsigned Alignment = LD->getOriginalAlignment();
   bool isVolatile = LD->isVolatile();
+  bool isNonTemporal = LD->isNonTemporal();
 
   EVT LoMemVT, HiMemVT;
   GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
 
   Lo = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, LoVT, Ch, Ptr, Offset,
-                   SV, SVOffset, LoMemVT, isVolatile, Alignment);
+                   SV, SVOffset, LoMemVT, isVolatile, isNonTemporal, Alignment);
 
   unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                     DAG.getIntPtrConstant(IncrementSize));
   SVOffset += IncrementSize;
   Hi = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, HiVT, Ch, Ptr, Offset,
-                   SV, SVOffset, HiMemVT, isVolatile, Alignment);
+                   SV, SVOffset, HiMemVT, isVolatile, isNonTemporal, Alignment);
 
   // Build a factor node to remember that this load is independent of the
   // other one.
@@ -1086,12 +1093,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
   SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
   int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
   const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
-  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, SV, 0);
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, SV, 0,
+                               false, false, 0);
 
   // Load back the required element.
   StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
   return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
-                        SV, 0, EltVT);
+                        SV, 0, EltVT, false, false, 0);
 }
 
 SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
@@ -1106,6 +1114,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
   EVT MemoryVT = N->getMemoryVT();
   unsigned Alignment = N->getOriginalAlignment();
   bool isVol = N->isVolatile();
+  bool isNT = N->isNonTemporal();
   SDValue Lo, Hi;
   GetSplitVector(N->getOperand(1), Lo, Hi);
 
@@ -1116,10 +1125,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
 
   if (isTruncating)
     Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
-                           LoMemVT, isVol, Alignment);
+                           LoMemVT, isVol, isNT, Alignment);
   else
     Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
-                      isVol, Alignment);
+                      isVol, isNT, Alignment);
 
   // Increment the pointer to the other half.
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
@@ -1128,10 +1137,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
 
   if (isTruncating)
     Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset,
-                           HiMemVT, isVol, Alignment);
+                           HiMemVT, isVol, isNT, Alignment);
   else
     Hi = DAG.getStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset,
-                      isVol, Alignment);
+                      isVol, isNT, Alignment);
 
   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
 }
@@ -1242,10 +1251,96 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
 
 SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
   // Binary op widening.
+  unsigned Opcode = N->getOpcode();
+  DebugLoc dl = N->getDebugLoc();
   EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  SDValue InOp1 = GetWidenedVector(N->getOperand(0));
-  SDValue InOp2 = GetWidenedVector(N->getOperand(1));
-  return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp1, InOp2);
+  EVT WidenEltVT = WidenVT.getVectorElementType();
+  EVT VT = WidenVT;
+  unsigned NumElts =  VT.getVectorNumElements();
+  while (!TLI.isTypeLegal(VT) && NumElts != 1) {
+     NumElts = NumElts / 2;
+     VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+  }
+
+  if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) {
+    // Operation doesn't trap so just widen as normal.
+    SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+    SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+    return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
+  } else if (NumElts == 1) {
+    // No legal vector version so unroll the vector operation and then widen.
+    return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+  } else {
+    // Since the operation can trap, apply operation on the original vector.
+    SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+    SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+    unsigned CurNumElts = N->getValueType(0).getVectorNumElements();
+
+    SmallVector<SDValue, 16> ConcatOps(CurNumElts);
+    unsigned ConcatEnd = 0;  // Current ConcatOps index.
+    unsigned Idx = 0;        // Current Idx into input vectors.
+    while (CurNumElts != 0) {
+      while (CurNumElts >= NumElts) {
+        SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
+                                   DAG.getIntPtrConstant(Idx));
+        SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
+                                   DAG.getIntPtrConstant(Idx));
+        ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2);
+        Idx += NumElts;
+        CurNumElts -= NumElts;
+      }
+      EVT PrevVecVT = VT;
+      do {
+        NumElts = NumElts / 2;
+        VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+      } while (!TLI.isTypeLegal(VT) && NumElts != 1);
+
+      if (NumElts == 1) {
+        // Since we are using concat vector, build a vector from the scalar ops.
+        SDValue VecOp = DAG.getUNDEF(PrevVecVT);
+        for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
+          SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, 
+                                     InOp1, DAG.getIntPtrConstant(Idx));
+          SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, 
+                                     InOp2, DAG.getIntPtrConstant(Idx));
+          VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, PrevVecVT, VecOp,
+                              DAG.getNode(Opcode, dl, WidenEltVT, EOp1, EOp2),
+                              DAG.getIntPtrConstant(i));
+        }
+        CurNumElts = 0;
+        ConcatOps[ConcatEnd++] = VecOp;
+      }
+    }
+
+    // Check to see if we have a single operation with the widen type.
+    if (ConcatEnd == 1) {
+      VT = ConcatOps[0].getValueType();
+      if (VT == WidenVT)
+        return ConcatOps[0];
+    }
+
+    // Rebuild vector to one with the widen type
+    Idx = ConcatEnd - 1;
+    while (Idx != 0) {
+      VT = ConcatOps[Idx--].getValueType();
+      while (Idx != 0 && ConcatOps[Idx].getValueType() == VT)
+        --Idx;
+      if (Idx != 0) {
+        VT = ConcatOps[Idx].getValueType();
+        ConcatOps[Idx+1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
+                                     &ConcatOps[Idx+1], ConcatEnd - Idx - 1);
+        ConcatEnd = Idx + 2;
+      }
+    }
+    
+    unsigned NumOps = WidenVT.getVectorNumElements()/VT.getVectorNumElements();
+    if (NumOps != ConcatEnd ) {
+      SDValue UndefVal = DAG.getUNDEF(VT);
+      for (unsigned j = ConcatEnd; j < NumOps; ++j)
+        ConcatOps[j] = UndefVal;
+    }
+    return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], NumOps);
+  }
 }
 
 SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
@@ -2042,6 +2137,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
   int       SVOffset = LD->getSrcValueOffset();
   unsigned  Align    = LD->getAlignment();
   bool      isVolatile = LD->isVolatile();
+  bool      isNonTemporal = LD->isNonTemporal();
   const Value *SV = LD->getSrcValue();
 
   int LdWidth = LdVT.getSizeInBits();
@@ -2052,7 +2148,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
   EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
   int NewVTWidth = NewVT.getSizeInBits();
   SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV, SVOffset,
-                             isVolatile, Align);
+                             isVolatile, isNonTemporal, Align);
   LdChain.push_back(LdOp.getValue(1));
 
   // Check if we can load the element with one instruction
@@ -2099,7 +2195,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
 
     SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV,
                                SVOffset+Offset, isVolatile,
-                               MinAlign(Align, Increment));
+                               isNonTemporal, MinAlign(Align, Increment));
     LdChain.push_back(LdOp.getValue(1));
     LdOps.push_back(LdOp);
 
@@ -2173,6 +2269,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
   int       SVOffset = LD->getSrcValueOffset();
   unsigned  Align    = LD->getAlignment();
   bool      isVolatile = LD->isVolatile();
+  bool      isNonTemporal = LD->isNonTemporal();
   const Value *SV = LD->getSrcValue();
 
   EVT EltVT = WidenVT.getVectorElementType();
@@ -2184,14 +2281,15 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
   SmallVector<SDValue, 16> Ops(WidenNumElts);
   unsigned Increment = LdEltVT.getSizeInBits() / 8;
   Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, SV, SVOffset,
-                          LdEltVT, isVolatile, Align);
+                          LdEltVT, isVolatile, isNonTemporal, Align);
   LdChain.push_back(Ops[0].getValue(1));
   unsigned i = 0, Offset = Increment;
   for (i=1; i < NumElts; ++i, Offset += Increment) {
     SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
                                      BasePtr, DAG.getIntPtrConstant(Offset));
     Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, SV,
-                            SVOffset + Offset, LdEltVT, isVolatile, Align);
+                            SVOffset + Offset, LdEltVT, isVolatile,
+                            isNonTemporal, Align);
     LdChain.push_back(Ops[i].getValue(1));
   }
 
@@ -2215,6 +2313,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
   int      SVOffset = ST->getSrcValueOffset();
   unsigned Align = ST->getAlignment();
   bool     isVolatile = ST->isVolatile();
+  bool     isNonTemporal = ST->isNonTemporal();
   SDValue  ValOp = GetWidenedVector(ST->getValue());
   DebugLoc dl = ST->getDebugLoc();
 
@@ -2240,6 +2339,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
                                    DAG.getIntPtrConstant(Idx));
         StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV,
                                        SVOffset + Offset, isVolatile,
+                                       isNonTemporal,
                                        MinAlign(Align, Offset)));
         StWidth -= NewVTWidth;
         Offset += Increment;
@@ -2258,8 +2358,8 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
         SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
                       DAG.getIntPtrConstant(Idx++));
         StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV,
-                                   SVOffset + Offset, isVolatile,
-                                   MinAlign(Align, Offset)));
+                                       SVOffset + Offset, isVolatile,
+                                       isNonTemporal, MinAlign(Align, Offset)));
         StWidth -= NewVTWidth;
         Offset += Increment;
         BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
@@ -2282,6 +2382,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
   int      SVOffset = ST->getSrcValueOffset();
   unsigned Align = ST->getAlignment();
   bool     isVolatile = ST->isVolatile();
+  bool     isNonTemporal = ST->isNonTemporal();
   SDValue  ValOp = GetWidenedVector(ST->getValue());
   DebugLoc dl = ST->getDebugLoc();
   
@@ -2304,7 +2405,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
                             DAG.getIntPtrConstant(0));
   StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, SV,
                                       SVOffset, StEltVT,
-                                      isVolatile, Align));
+                                      isVolatile, isNonTemporal, Align));
   unsigned Offset = Increment;
   for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
     SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
@@ -2313,7 +2414,8 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
                             DAG.getIntPtrConstant(0));
     StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, SV,
                                         SVOffset + Offset, StEltVT,
-                                        isVolatile, MinAlign(Align, Offset)));
+                                        isVolatile, isNonTemporal,
+                                        MinAlign(Align, Offset)));
   }
 }
 
diff --git a/lib/CodeGen/SelectionDAG/SDDbgValue.h b/lib/CodeGen/SelectionDAG/SDDbgValue.h
new file mode 100644
index 0000000..9e15fc9
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SDDbgValue.h
@@ -0,0 +1,67 @@
+//===-- llvm/CodeGen/SDDbgValue.h - SD dbg_value handling--------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SDDbgValue class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SDDBGVALUE_H
+#define LLVM_CODEGEN_SDDBGVALUE_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DebugLoc.h"
+
+namespace llvm {
+
+class MDNode;
+class SDNode;
+class Value;
+
+/// SDDbgValue - Holds the information from a dbg_value node through SDISel.
+/// Either Const or Node is nonzero, but not both.
+/// We do not use SDValue here to avoid including its header.
+
+class SDDbgValue {
+  SDNode *Node;           // valid for non-constants
+  unsigned ResNo;         // valid for non-constants
+  Value *Const;           // valid for constants
+  MDNode *mdPtr;
+  uint64_t Offset;
+  DebugLoc DL;
+public:
+  // Constructor for non-constants.
+  SDDbgValue(MDNode *mdP, SDNode *N, unsigned R, uint64_t off, DebugLoc dl) :
+    Node(N), ResNo(R), Const(0), mdPtr(mdP), Offset(off), DL(dl) {}
+
+  // Constructor for constants.
+  SDDbgValue(MDNode *mdP, Value *C, uint64_t off, DebugLoc dl) : Node(0),
+    ResNo(0), Const(C), mdPtr(mdP), Offset(off), DL(dl) {}
+
+  // Returns the MDNode pointer.
+  MDNode *getMDPtr() { return mdPtr; }
+
+  // Returns the SDNode* (valid for non-constants only).
+  SDNode *getSDNode() { assert (!Const); return Node; }
+
+  // Returns the ResNo (valid for non-constants only).
+  unsigned getResNo() { assert (!Const); return ResNo; }
+
+  // Returns the Value* for a constant (invalid for non-constants).
+  Value *getConst() { assert (!Node); return Const; }
+
+  // Returns the offset.
+  uint64_t getOffset() { return Offset; }
+
+  // Returns the DebugLoc.
+  DebugLoc getDebugLoc() { return DL; }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index b51c61b..06e7b8c 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -218,8 +218,20 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
   // Check to see if the scheduler cares about latencies.
   bool UnitLatencies = ForceUnitLatencies();
 
-  for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
-       E = DAG->allnodes_end(); NI != E; ++NI) {
+  // Add all nodes in depth first order.
+  SmallVector<SDNode*, 64> Worklist;
+  SmallPtrSet<SDNode*, 64> Visited;
+  Worklist.push_back(DAG->getRoot().getNode());
+  Visited.insert(DAG->getRoot().getNode());
+  
+  while (!Worklist.empty()) {
+    SDNode *NI = Worklist.pop_back_val();
+    
+    // Add all operands to the worklist unless they've already been added.
+    for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i)
+      if (Visited.insert(NI->getOperand(i).getNode()))
+        Worklist.push_back(NI->getOperand(i).getNode());
+  
     if (isPassiveNode(NI))  // Leaf node, e.g. a TargetImmediate.
       continue;
     
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 6122a2a..746d4e2 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -468,18 +468,20 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) {
 }
 
 /// encodeMemSDNodeFlags - Generic routine for computing a value for use in
-/// the CSE map that carries volatility, indexing mode, and
+/// the CSE map that carries volatility, temporalness, indexing mode, and
 /// extension/truncation information.
 ///
 static inline unsigned
-encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile) {
+encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile,
+                     bool isNonTemporal) {
   assert((ConvType & 3) == ConvType &&
          "ConvType may not require more than 2 bits!");
   assert((AM & 7) == AM &&
          "AM may not require more than 3 bits!");
   return ConvType |
          (AM << 2) |
-         (isVolatile << 5);
+         (isVolatile << 5) |
+         (isNonTemporal << 6);
 }
 
 //===----------------------------------------------------------------------===//
@@ -829,6 +831,7 @@ void SelectionDAG::clear() {
   EntryNode.UseList = 0;
   AllNodes.push_back(&EntryNode);
   Root = getEntryNode();
+  delete Ordering;
   Ordering = new SDNodeOrdering();
 }
 
@@ -859,14 +862,14 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, EVT VT) {
 /// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
 ///
 SDValue SelectionDAG::getNOT(DebugLoc DL, SDValue Val, EVT VT) {
-  EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+  EVT EltVT = VT.getScalarType();
   SDValue NegOne =
     getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
   return getNode(ISD::XOR, DL, VT, Val, NegOne);
 }
 
 SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT) {
-  EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+  EVT EltVT = VT.getScalarType();
   assert((EltVT.getSizeInBits() >= 64 ||
          (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) &&
          "getConstant with a uint64_t value that doesn't fit in the type!");
@@ -880,7 +883,7 @@ SDValue SelectionDAG::getConstant(const APInt &Val, EVT VT, bool isT) {
 SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) {
   assert(VT.isInteger() && "Cannot create FP integer constant!");
 
-  EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+  EVT EltVT = VT.getScalarType();
   assert(Val.getBitWidth() == EltVT.getSizeInBits() &&
          "APInt size does not match type size!");
 
@@ -923,8 +926,7 @@ SDValue SelectionDAG::getConstantFP(const APFloat& V, EVT VT, bool isTarget) {
 SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){
   assert(VT.isFloatingPoint() && "Cannot create integer FP constant!");
 
-  EVT EltVT =
-    VT.isVector() ? VT.getVectorElementType() : VT;
+  EVT EltVT = VT.getScalarType();
 
   // Do the map lookup using the actual bit pattern for the floating point
   // value, so that we don't have problems with 0.0 comparing equal to -0.0, and
@@ -958,8 +960,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){
 }
 
 SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) {
-  EVT EltVT =
-    VT.isVector() ? VT.getVectorElementType() : VT;
+  EVT EltVT = VT.getScalarType();
   if (EltVT==MVT::f32)
     return getConstantFP(APFloat((float)Val), VT, isTarget);
   else
@@ -1344,7 +1345,7 @@ SDValue SelectionDAG::getBlockAddress(BlockAddress *BA, EVT VT,
 }
 
 SDValue SelectionDAG::getSrcValue(const Value *V) {
-  assert((!V || isa<PointerType>(V->getType())) &&
+  assert((!V || V->getType()->isPointerTy()) &&
          "SrcValue is not a pointer?");
 
   FoldingSetNodeID ID;
@@ -2232,6 +2233,29 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
   return false;
 }
 
+bool SelectionDAG::isKnownNeverZero(SDValue Op) const {
+  // If the value is a constant, we can obviously see if it is a zero or not.
+  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
+    return !C->isZero();
+
+  // TODO: Recognize more cases here.
+
+  return false;
+}
+
+bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
+  // Check the obvious case.
+  if (A == B) return true;
+
+  // For for negative and positive zero.
+  if (const ConstantFPSDNode *CA = dyn_cast<ConstantFPSDNode>(A))
+    if (const ConstantFPSDNode *CB = dyn_cast<ConstantFPSDNode>(B))
+      if (CA->isZero() && CB->isZero()) return true;
+
+  // Otherwise they may not be equal.
+  return false;
+}
+
 bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const {
   GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
   if (!GA) return false;
@@ -3080,8 +3104,7 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {
 /// operand.
 static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
                               DebugLoc dl) {
-  unsigned NumBits = VT.isVector() ?
-    VT.getVectorElementType().getSizeInBits() : VT.getSizeInBits();
+  unsigned NumBits = VT.getScalarType().getSizeInBits();
   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
     APInt Val = APInt(NumBits, C->getZExtValue() & 255);
     unsigned Shift = 8;
@@ -3185,7 +3208,7 @@ bool MeetsMaxMemopRequirement(std::vector<EVT> &MemOps,
   bool isSrcConst = isa<ConstantSDNode>(Src);
   EVT VT = TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr, DAG);
   bool AllowUnalign = TLI.allowsUnalignedMemoryAccesses(VT);
-  if (VT != MVT::iAny) {
+  if (VT != MVT::Other) {
     const Type *Ty = VT.getTypeForEVT(*DAG.getContext());
     unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
     // If source is a string constant, this will require an unaligned load.
@@ -3193,14 +3216,14 @@ bool MeetsMaxMemopRequirement(std::vector<EVT> &MemOps,
       if (Dst.getOpcode() != ISD::FrameIndex) {
         // Can't change destination alignment. It requires a unaligned store.
         if (AllowUnalign)
-          VT = MVT::iAny;
+          VT = MVT::Other;
       } else {
         int FI = cast<FrameIndexSDNode>(Dst)->getIndex();
         MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
         if (MFI->isFixedObjectIndex(FI)) {
           // Can't change destination alignment. It requires a unaligned store.
           if (AllowUnalign)
-            VT = MVT::iAny;
+            VT = MVT::Other;
         } else {
           // Give the stack frame object a larger alignment if needed.
           if (MFI->getObjectAlignment(FI) < NewAlign)
@@ -3211,7 +3234,7 @@ bool MeetsMaxMemopRequirement(std::vector<EVT> &MemOps,
     }
   }
 
-  if (VT == MVT::iAny) {
+  if (VT == MVT::Other) {
     if (TLI.allowsUnalignedMemoryAccesses(MVT::i64)) {
       VT = MVT::i64;
     } else {
@@ -3299,7 +3322,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
       Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff);
       Store = DAG.getStore(Chain, dl, Value,
                            getMemBasePlusOffset(Dst, DstOff, DAG),
-                           DstSV, DstSVOff + DstOff, false, DstAlign);
+                           DstSV, DstSVOff + DstOff, false, false, DstAlign);
     } else {
       // The type might not be legal for the target.  This should only happen
       // if the type is smaller than a legal type, as on PPC, so the right
@@ -3310,10 +3333,11 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
       assert(NVT.bitsGE(VT));
       Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
                              getMemBasePlusOffset(Src, SrcOff, DAG),
-                             SrcSV, SrcSVOff + SrcOff, VT, false, Align);
+                             SrcSV, SrcSVOff + SrcOff, VT, false, false, Align);
       Store = DAG.getTruncStore(Chain, dl, Value,
-                             getMemBasePlusOffset(Dst, DstOff, DAG),
-                             DstSV, DstSVOff + DstOff, VT, false, DstAlign);
+                                getMemBasePlusOffset(Dst, DstOff, DAG),
+                                DstSV, DstSVOff + DstOff, VT, false, false,
+                                DstAlign);
     }
     OutChains.push_back(Store);
     SrcOff += VTSize;
@@ -3358,7 +3382,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
 
     Value = DAG.getLoad(VT, dl, Chain,
                         getMemBasePlusOffset(Src, SrcOff, DAG),
-                        SrcSV, SrcSVOff + SrcOff, false, Align);
+                        SrcSV, SrcSVOff + SrcOff, false, false, Align);
     LoadValues.push_back(Value);
     LoadChains.push_back(Value.getValue(1));
     SrcOff += VTSize;
@@ -3373,7 +3397,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
 
     Store = DAG.getStore(Chain, dl, LoadValues[i],
                          getMemBasePlusOffset(Dst, DstOff, DAG),
-                         DstSV, DstSVOff + DstOff, false, DstAlign);
+                         DstSV, DstSVOff + DstOff, false, false, DstAlign);
     OutChains.push_back(Store);
     DstOff += VTSize;
   }
@@ -3408,7 +3432,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
     SDValue Value = getMemsetValue(Src, VT, DAG, dl);
     SDValue Store = DAG.getStore(Chain, dl, Value,
                                  getMemBasePlusOffset(Dst, DstOff, DAG),
-                                 DstSV, DstSVOff + DstOff);
+                                 DstSV, DstSVOff + DstOff, false, false, 0);
     OutChains.push_back(Store);
     DstOff += VTSize;
   }
@@ -3472,7 +3496,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
                     /*isReturnValueUsed=*/false,
                     getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY),
                                       TLI.getPointerTy()),
-                    Args, *this, dl, GetOrdering(Chain.getNode()));
+                    Args, *this, dl);
   return CallResult.second;
 }
 
@@ -3521,7 +3545,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
                     /*isReturnValueUsed=*/false,
                     getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE),
                                       TLI.getPointerTy()),
-                    Args, *this, dl, GetOrdering(Chain.getNode()));
+                    Args, *this, dl);
   return CallResult.second;
 }
 
@@ -3580,7 +3604,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
                     /*isReturnValueUsed=*/false,
                     getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
                                       TLI.getPointerTy()),
-                    Args, *this, dl, GetOrdering(Chain.getNode()));
+                    Args, *this, dl);
   return CallResult.second;
 }
 
@@ -3788,7 +3812,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
                       ISD::LoadExtType ExtType, EVT VT, SDValue Chain,
                       SDValue Ptr, SDValue Offset,
                       const Value *SV, int SVOffset, EVT MemVT,
-                      bool isVolatile, unsigned Alignment) {
+                      bool isVolatile, bool isNonTemporal,
+                      unsigned Alignment) {
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
     Alignment = getEVTAlignment(VT);
 
@@ -3802,6 +3827,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
   unsigned Flags = MachineMemOperand::MOLoad;
   if (isVolatile)
     Flags |= MachineMemOperand::MOVolatile;
+  if (isNonTemporal)
+    Flags |= MachineMemOperand::MONonTemporal;
   MachineMemOperand *MMO =
     MF.getMachineMemOperand(SV, Flags, SVOffset,
                             MemVT.getStoreSize(), Alignment);
@@ -3840,7 +3867,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3);
   ID.AddInteger(MemVT.getRawBits());
-  ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile()));
+  ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(),
+                                     MMO->isNonTemporal()));
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
     cast<LoadSDNode>(E)->refineAlignment(MMO);
@@ -3856,20 +3884,22 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
 SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl,
                               SDValue Chain, SDValue Ptr,
                               const Value *SV, int SVOffset,
-                              bool isVolatile, unsigned Alignment) {
+                              bool isVolatile, bool isNonTemporal,
+                              unsigned Alignment) {
   SDValue Undef = getUNDEF(Ptr.getValueType());
   return getLoad(ISD::UNINDEXED, dl, ISD::NON_EXTLOAD, VT, Chain, Ptr, Undef,
-                 SV, SVOffset, VT, isVolatile, Alignment);
+                 SV, SVOffset, VT, isVolatile, isNonTemporal, Alignment);
 }
 
 SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,
                                  SDValue Chain, SDValue Ptr,
                                  const Value *SV,
                                  int SVOffset, EVT MemVT,
-                                 bool isVolatile, unsigned Alignment) {
+                                 bool isVolatile, bool isNonTemporal,
+                                 unsigned Alignment) {
   SDValue Undef = getUNDEF(Ptr.getValueType());
   return getLoad(ISD::UNINDEXED, dl, ExtType, VT, Chain, Ptr, Undef,
-                 SV, SVOffset, MemVT, isVolatile, Alignment);
+                 SV, SVOffset, MemVT, isVolatile, isNonTemporal, Alignment);
 }
 
 SDValue
@@ -3881,12 +3911,13 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
   return getLoad(AM, dl, LD->getExtensionType(), OrigLoad.getValueType(),
                  LD->getChain(), Base, Offset, LD->getSrcValue(),
                  LD->getSrcValueOffset(), LD->getMemoryVT(),
-                 LD->isVolatile(), LD->getAlignment());
+                 LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment());
 }
 
 SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
                                SDValue Ptr, const Value *SV, int SVOffset,
-                               bool isVolatile, unsigned Alignment) {
+                               bool isVolatile, bool isNonTemporal,
+                               unsigned Alignment) {
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
     Alignment = getEVTAlignment(Val.getValueType());
 
@@ -3900,6 +3931,8 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
   unsigned Flags = MachineMemOperand::MOStore;
   if (isVolatile)
     Flags |= MachineMemOperand::MOVolatile;
+  if (isNonTemporal)
+    Flags |= MachineMemOperand::MONonTemporal;
   MachineMemOperand *MMO =
     MF.getMachineMemOperand(SV, Flags, SVOffset,
                             Val.getValueType().getStoreSize(), Alignment);
@@ -3916,7 +3949,8 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
   ID.AddInteger(VT.getRawBits());
-  ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile()));
+  ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
+                                     MMO->isNonTemporal()));
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
     cast<StoreSDNode>(E)->refineAlignment(MMO);
@@ -3932,7 +3966,8 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
 SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
                                     SDValue Ptr, const Value *SV,
                                     int SVOffset, EVT SVT,
-                                    bool isVolatile, unsigned Alignment) {
+                                    bool isVolatile, bool isNonTemporal,
+                                    unsigned Alignment) {
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
     Alignment = getEVTAlignment(SVT);
 
@@ -3946,6 +3981,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
   unsigned Flags = MachineMemOperand::MOStore;
   if (isVolatile)
     Flags |= MachineMemOperand::MOVolatile;
+  if (isNonTemporal)
+    Flags |= MachineMemOperand::MONonTemporal;
   MachineMemOperand *MMO =
     MF.getMachineMemOperand(SV, Flags, SVOffset, SVT.getStoreSize(), Alignment);
 
@@ -3976,7 +4013,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
   ID.AddInteger(SVT.getRawBits());
-  ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile()));
+  ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(),
+                                     MMO->isNonTemporal()));
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
     cast<StoreSDNode>(E)->refineAlignment(MMO);
@@ -4535,91 +4573,13 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
                                    SDVTList VTs, const SDValue *Ops,
                                    unsigned NumOps) {
-  return MorphNodeTo(N, ~MachineOpc, VTs, Ops, NumOps);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  EVT VT) {
-  SDVTList VTs = getVTList(VT);
-  return MorphNodeTo(N, Opc, VTs, 0, 0);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  EVT VT, SDValue Op1) {
-  SDVTList VTs = getVTList(VT);
-  SDValue Ops[] = { Op1 };
-  return MorphNodeTo(N, Opc, VTs, Ops, 1);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  EVT VT, SDValue Op1,
-                                  SDValue Op2) {
-  SDVTList VTs = getVTList(VT);
-  SDValue Ops[] = { Op1, Op2 };
-  return MorphNodeTo(N, Opc, VTs, Ops, 2);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  EVT VT, SDValue Op1,
-                                  SDValue Op2, SDValue Op3) {
-  SDVTList VTs = getVTList(VT);
-  SDValue Ops[] = { Op1, Op2, Op3 };
-  return MorphNodeTo(N, Opc, VTs, Ops, 3);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  EVT VT, const SDValue *Ops,
-                                  unsigned NumOps) {
-  SDVTList VTs = getVTList(VT);
-  return MorphNodeTo(N, Opc, VTs, Ops, NumOps);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  EVT VT1, EVT VT2, const SDValue *Ops,
-                                  unsigned NumOps) {
-  SDVTList VTs = getVTList(VT1, VT2);
-  return MorphNodeTo(N, Opc, VTs, Ops, NumOps);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  EVT VT1, EVT VT2) {
-  SDVTList VTs = getVTList(VT1, VT2);
-  return MorphNodeTo(N, Opc, VTs, (SDValue *)0, 0);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  EVT VT1, EVT VT2, EVT VT3,
-                                  const SDValue *Ops, unsigned NumOps) {
-  SDVTList VTs = getVTList(VT1, VT2, VT3);
-  return MorphNodeTo(N, Opc, VTs, Ops, NumOps);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  EVT VT1, EVT VT2,
-                                  SDValue Op1) {
-  SDVTList VTs = getVTList(VT1, VT2);
-  SDValue Ops[] = { Op1 };
-  return MorphNodeTo(N, Opc, VTs, Ops, 1);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  EVT VT1, EVT VT2,
-                                  SDValue Op1, SDValue Op2) {
-  SDVTList VTs = getVTList(VT1, VT2);
-  SDValue Ops[] = { Op1, Op2 };
-  return MorphNodeTo(N, Opc, VTs, Ops, 2);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
-                                  EVT VT1, EVT VT2,
-                                  SDValue Op1, SDValue Op2,
-                                  SDValue Op3) {
-  SDVTList VTs = getVTList(VT1, VT2);
-  SDValue Ops[] = { Op1, Op2, Op3 };
-  return MorphNodeTo(N, Opc, VTs, Ops, 3);
+  N = MorphNodeTo(N, ~MachineOpc, VTs, Ops, NumOps);
+  // Reset the NodeID to -1.
+  N->setNodeId(-1);
+  return N;
 }
 
-/// MorphNodeTo - These *mutate* the specified node to have the specified
+/// MorphNodeTo - This *mutates* the specified node to have the specified
 /// return type, opcode, and operands.
 ///
 /// Note that MorphNodeTo returns the resultant node.  If there is already a
@@ -4695,12 +4655,14 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
 
   // Delete any nodes that are still dead after adding the uses for the
   // new operands.
-  SmallVector<SDNode *, 16> DeadNodes;
-  for (SmallPtrSet<SDNode *, 16>::iterator I = DeadNodeSet.begin(),
-       E = DeadNodeSet.end(); I != E; ++I)
-    if ((*I)->use_empty())
-      DeadNodes.push_back(*I);
-  RemoveDeadNodes(DeadNodes);
+  if (!DeadNodeSet.empty()) {
+    SmallVector<SDNode *, 16> DeadNodes;
+    for (SmallPtrSet<SDNode *, 16>::iterator I = DeadNodeSet.begin(),
+         E = DeadNodeSet.end(); I != E; ++I)
+      if ((*I)->use_empty())
+        DeadNodes.push_back(*I);
+    RemoveDeadNodes(DeadNodes);
+  }
 
   if (IP)
     CSEMap.InsertNode(N, IP);   // Memoize the new node.
@@ -4907,6 +4869,43 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
   return NULL;
 }
 
+namespace {
+
+/// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node
+/// pointed to by a use iterator is deleted, increment the use iterator
+/// so that it doesn't dangle.
+///
+/// This class also manages a "downlink" DAGUpdateListener, to forward
+/// messages to ReplaceAllUsesWith's callers.
+///
+class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener {
+  SelectionDAG::DAGUpdateListener *DownLink;
+  SDNode::use_iterator &UI;
+  SDNode::use_iterator &UE;
+
+  virtual void NodeDeleted(SDNode *N, SDNode *E) {
+    // Increment the iterator as needed.
+    while (UI != UE && N == *UI)
+      ++UI;
+
+    // Then forward the message.
+    if (DownLink) DownLink->NodeDeleted(N, E);
+  }
+
+  virtual void NodeUpdated(SDNode *N) {
+    // Just forward the message.
+    if (DownLink) DownLink->NodeUpdated(N);
+  }
+
+public:
+  RAUWUpdateListener(SelectionDAG::DAGUpdateListener *dl,
+                     SDNode::use_iterator &ui,
+                     SDNode::use_iterator &ue)
+    : DownLink(dl), UI(ui), UE(ue) {}
+};
+
+}
+
 /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
 /// This can cause recursive merging of nodes in the DAG.
 ///
@@ -4927,6 +4926,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To,
   // is replaced by To, we don't want to replace of all its users with To
   // too. See PR3018 for more info.
   SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+  RAUWUpdateListener Listener(UpdateListener, UI, UE);
   while (UI != UE) {
     SDNode *User = *UI;
 
@@ -4945,7 +4945,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To,
 
     // Now that we have modified User, add it back to the CSE maps.  If it
     // already exists there, recursively merge the results together.
-    AddModifiedNodeToCSEMaps(User, UpdateListener);
+    AddModifiedNodeToCSEMaps(User, &Listener);
   }
 }
 
@@ -4971,6 +4971,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To,
   // Iterate over just the existing users of From. See the comments in
   // the ReplaceAllUsesWith above.
   SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+  RAUWUpdateListener Listener(UpdateListener, UI, UE);
   while (UI != UE) {
     SDNode *User = *UI;
 
@@ -4989,7 +4990,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To,
 
     // Now that we have modified User, add it back to the CSE maps.  If it
     // already exists there, recursively merge the results together.
-    AddModifiedNodeToCSEMaps(User, UpdateListener);
+    AddModifiedNodeToCSEMaps(User, &Listener);
   }
 }
 
@@ -5007,6 +5008,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From,
   // Iterate over just the existing users of From. See the comments in
   // the ReplaceAllUsesWith above.
   SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+  RAUWUpdateListener Listener(UpdateListener, UI, UE);
   while (UI != UE) {
     SDNode *User = *UI;
 
@@ -5026,7 +5028,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From,
 
     // Now that we have modified User, add it back to the CSE maps.  If it
     // already exists there, recursively merge the results together.
-    AddModifiedNodeToCSEMaps(User, UpdateListener);
+    AddModifiedNodeToCSEMaps(User, &Listener);
   }
 }
 
@@ -5048,6 +5050,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To,
   // the ReplaceAllUsesWith above.
   SDNode::use_iterator UI = From.getNode()->use_begin(),
                        UE = From.getNode()->use_end();
+  RAUWUpdateListener Listener(UpdateListener, UI, UE);
   while (UI != UE) {
     SDNode *User = *UI;
     bool UserRemovedFromCSEMaps = false;
@@ -5083,7 +5086,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To,
 
     // Now that we have modified User, add it back to the CSE maps.  If it
     // already exists there, recursively merge the results together.
-    AddModifiedNodeToCSEMaps(User, UpdateListener);
+    AddModifiedNodeToCSEMaps(User, &Listener);
   }
 }
 
@@ -5280,8 +5283,11 @@ GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, const GlobalValue *GA,
 MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt,
                      MachineMemOperand *mmo)
  : SDNode(Opc, dl, VTs), MemoryVT(memvt), MMO(mmo) {
-  SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile());
+  SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
+                                      MMO->isNonTemporal());
   assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
+  assert(isNonTemporal() == MMO->isNonTemporal() &&
+         "Non-temporal encoding error!");
   assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!");
 }
 
@@ -5290,7 +5296,8 @@ MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs,
                      MachineMemOperand *mmo)
    : SDNode(Opc, dl, VTs, Ops, NumOps),
      MemoryVT(memvt), MMO(mmo) {
-  SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile());
+  SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
+                                      MMO->isNonTemporal());
   assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
   assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!");
 }
@@ -5459,15 +5466,15 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
         if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo())
           if (getMachineOpcode() < TII->getNumOpcodes())
             return TII->get(getMachineOpcode()).getName();
-      return "<<Unknown Machine Node>>";
+      return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>";
     }
     if (G) {
       const TargetLowering &TLI = G->getTargetLoweringInfo();
       const char *Name = TLI.getTargetNodeName(getOpcode());
       if (Name) return Name;
-      return "<<Unknown Target Node>>";
+      return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>";
     }
-    return "<<Unknown Node>>";
+    return "<<Unknown Node #" + utostr(getOpcode()) + ">>";
 
 #ifndef NDEBUG
   case ISD::DELETED_NODE:
@@ -5904,6 +5911,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
   if (G)
     if (unsigned Order = G->GetOrdering(this))
       OS << " [ORD=" << Order << ']';
+  
+  if (getNodeId() != -1)
+    OS << " [ID=" << getNodeId() << ']';
 }
 
 void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
@@ -6292,31 +6302,37 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
   return true;
 }
 
+#ifdef XDEBUG
 static void checkForCyclesHelper(const SDNode *N,
-                                 std::set<const SDNode *> &visited) {
-  if (visited.find(N) != visited.end()) {
+                                 SmallPtrSet<const SDNode*, 32> &Visited,
+                                 SmallPtrSet<const SDNode*, 32> &Checked) {
+  // If this node has already been checked, don't check it again.
+  if (Checked.count(N))
+    return;
+  
+  // If a node has already been visited on this depth-first walk, reject it as
+  // a cycle.
+  if (!Visited.insert(N)) {
     dbgs() << "Offending node:\n";
     N->dumprFull();
-    assert(0 && "Detected cycle in SelectionDAG");
+    errs() << "Detected cycle in SelectionDAG\n";
+    abort();
   }
-
-  std::set<const SDNode*>::iterator i;
-  bool inserted;
-
-  tie(i, inserted) = visited.insert(N);
-  assert(inserted && "Missed cycle");
-
-  for(unsigned i = 0; i < N->getNumOperands(); ++i) {
-    checkForCyclesHelper(N->getOperand(i).getNode(), visited);
-  }
-  visited.erase(i);
+  
+  for(unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+    checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked);
+  
+  Checked.insert(N);
+  Visited.erase(N);
 }
+#endif
 
 void llvm::checkForCycles(const llvm::SDNode *N) {
 #ifdef XDEBUG
   assert(N && "Checking nonexistant SDNode");
-  std::set<const SDNode *> visited;
-  checkForCyclesHelper(N, visited);
+  SmallPtrSet<const SDNode*, 32> visited;
+  SmallPtrSet<const SDNode*, 32> checked;
+  checkForCyclesHelper(N, visited, checked);
 #endif
 }
 
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index de17f90..05be9a1 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -155,7 +155,7 @@ namespace {
     /// this value and returns the result as a ValueVTs value.  This uses
     /// Chain/Flag as the input and updates them for the output Chain/Flag.
     /// If the Flag pointer is NULL, no flag is used.
-    SDValue getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
+    SDValue getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
                             SDValue &Chain, SDValue *Flag) const;
 
     /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
@@ -163,14 +163,14 @@ namespace {
     /// Chain/Flag as the input and updates them for the output Chain/Flag.
     /// If the Flag pointer is NULL, no flag is used.
     void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
-                       unsigned Order, SDValue &Chain, SDValue *Flag) const;
+                       SDValue &Chain, SDValue *Flag) const;
 
     /// AddInlineAsmOperands - Add this value to the specified inlineasm node
     /// operand list.  This adds the code marker, matching input operand index
     /// (if applicable), and includes the number of values added into it.
     void AddInlineAsmOperands(unsigned Code,
                               bool HasMatching, unsigned MatchingIdx,
-                              SelectionDAG &DAG, unsigned Order,
+                              SelectionDAG &DAG,
                               std::vector<SDValue> &Ops) const;
   };
 }
@@ -180,7 +180,7 @@ namespace {
 /// larger then ValueVT then AssertOp can be used to specify whether the extra
 /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
 /// (ISD::AssertSext).
-static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
+static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
                                 const SDValue *Parts,
                                 unsigned NumParts, EVT PartVT, EVT ValueVT,
                                 ISD::NodeType AssertOp = ISD::DELETED_NODE) {
@@ -205,9 +205,9 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
       EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
 
       if (RoundParts > 2) {
-        Lo = getCopyFromParts(DAG, dl, Order, Parts, RoundParts / 2,
+        Lo = getCopyFromParts(DAG, dl, Parts, RoundParts / 2,
                               PartVT, HalfVT);
-        Hi = getCopyFromParts(DAG, dl, Order, Parts + RoundParts / 2,
+        Hi = getCopyFromParts(DAG, dl, Parts + RoundParts / 2,
                               RoundParts / 2, PartVT, HalfVT);
       } else {
         Lo = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[0]);
@@ -223,7 +223,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
         // Assemble the trailing non-power-of-2 part.
         unsigned OddParts = NumParts - RoundParts;
         EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
-        Hi = getCopyFromParts(DAG, dl, Order,
+        Hi = getCopyFromParts(DAG, dl,
                               Parts + RoundParts, OddParts, PartVT, OddVT);
 
         // Combine the round and odd parts.
@@ -259,7 +259,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
         // If the register was not expanded, truncate or copy the value,
         // as appropriate.
         for (unsigned i = 0; i != NumParts; ++i)
-          Ops[i] = getCopyFromParts(DAG, dl, Order, &Parts[i], 1,
+          Ops[i] = getCopyFromParts(DAG, dl, &Parts[i], 1,
                                     PartVT, IntermediateVT);
       } else if (NumParts > 0) {
         // If the intermediate type was expanded, build the intermediate
@@ -268,7 +268,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
                "Must expand into a divisible number of parts!");
         unsigned Factor = NumParts / NumIntermediates;
         for (unsigned i = 0; i != NumIntermediates; ++i)
-          Ops[i] = getCopyFromParts(DAG, dl, Order, &Parts[i * Factor], Factor,
+          Ops[i] = getCopyFromParts(DAG, dl, &Parts[i * Factor], Factor,
                                     PartVT, IntermediateVT);
       }
 
@@ -292,7 +292,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
       assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
              !PartVT.isVector() && "Unexpected split");
       EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
-      Val = getCopyFromParts(DAG, dl, Order, Parts, NumParts, PartVT, IntVT);
+      Val = getCopyFromParts(DAG, dl, Parts, NumParts, PartVT, IntVT);
     }
   }
 
@@ -349,7 +349,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
 /// getCopyToParts - Create a series of nodes that contain the specified value
 /// split into legal parts.  If the parts contain more bits than Val, then, for
 /// integers, ExtendKind can be used to specify how to generate the extra bits.
-static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
+static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl,
                            SDValue Val, SDValue *Parts, unsigned NumParts,
                            EVT PartVT,
                            ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
@@ -417,7 +417,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
       SDValue OddVal = DAG.getNode(ISD::SRL, dl, ValueVT, Val,
                                    DAG.getConstant(RoundBits,
                                                    TLI.getPointerTy()));
-      getCopyToParts(DAG, dl, Order, OddVal, Parts + RoundParts,
+      getCopyToParts(DAG, dl, OddVal, Parts + RoundParts,
                      OddParts, PartVT);
 
       if (TLI.isBigEndian())
@@ -514,7 +514,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
     // If the register was not expanded, promote or copy the value,
     // as appropriate.
     for (unsigned i = 0; i != NumParts; ++i)
-      getCopyToParts(DAG, dl, Order, Ops[i], &Parts[i], 1, PartVT);
+      getCopyToParts(DAG, dl, Ops[i], &Parts[i], 1, PartVT);
   } else if (NumParts > 0) {
     // If the intermediate type was expanded, split each the value into
     // legal parts.
@@ -522,7 +522,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
            "Must expand into a divisible number of parts!");
     unsigned Factor = NumParts / NumIntermediates;
     for (unsigned i = 0; i != NumIntermediates; ++i)
-      getCopyToParts(DAG, dl, Order, Ops[i], &Parts[i*Factor], Factor, PartVT);
+      getCopyToParts(DAG, dl, Ops[i], &Parts[i*Factor], Factor, PartVT);
   }
 }
 
@@ -680,7 +680,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
                                 getCurDebugLoc());
     }
 
-    if (isa<StructType>(C->getType()) || isa<ArrayType>(C->getType())) {
+    if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
       assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
              "Unknown struct or array constant!");
 
@@ -747,8 +747,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
 
   RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
   SDValue Chain = DAG.getEntryNode();
-  return RFV.getCopyFromRegs(DAG, getCurDebugLoc(),
-                             SDNodeOrder, Chain, NULL);
+  return RFV.getCopyFromRegs(DAG, getCurDebugLoc(), Chain, NULL);
 }
 
 /// Get the EVTs and ArgFlags collections that represent the legalized return 
@@ -844,19 +843,17 @@ void SelectionDAGBuilder::visitRet(ReturnInst &I) {
       Chains[i] =
         DAG.getStore(Chain, getCurDebugLoc(),
                      SDValue(RetOp.getNode(), RetOp.getResNo() + i),
-                     Add, NULL, Offsets[i], false, 0);
+                     Add, NULL, Offsets[i], false, false, 0);
     }
 
     Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
                         MVT::Other, &Chains[0], NumValues);
-  } else {
-    for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
-      SmallVector<EVT, 4> ValueVTs;
-      ComputeValueVTs(TLI, I.getOperand(i)->getType(), ValueVTs);
-      unsigned NumValues = ValueVTs.size();
-      if (NumValues == 0) continue;
-
-      SDValue RetOp = getValue(I.getOperand(i));
+  } else if (I.getNumOperands() != 0) {
+    SmallVector<EVT, 4> ValueVTs;
+    ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs);
+    unsigned NumValues = ValueVTs.size();
+    if (NumValues) {
+      SDValue RetOp = getValue(I.getOperand(0));
       for (unsigned j = 0, f = NumValues; j != f; ++j) {
         EVT VT = ValueVTs[j];
 
@@ -881,7 +878,7 @@ void SelectionDAGBuilder::visitRet(ReturnInst &I) {
         unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
         EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
         SmallVector<SDValue, 4> Parts(NumParts);
-        getCopyToParts(DAG, getCurDebugLoc(), SDNodeOrder,
+        getCopyToParts(DAG, getCurDebugLoc(),
                        SDValue(RetOp.getNode(), RetOp.getResNo() + j),
                        &Parts[0], NumParts, PartVT, ExtendKind);
 
@@ -1973,7 +1970,7 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
   if (Cases.size() >= 2)
     // Must recompute end() each iteration because it may be
     // invalidated by erase if we hold on to it
-    for (CaseItr I = Cases.begin(), J = ++(Cases.begin()); J != Cases.end(); ) {
+    for (CaseItr TmpBegin = Cases.begin(), I = TmpBegin, J = ++TmpBegin; J != Cases.end(); ) {
       const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
       const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
       MachineBasicBlock* nextBB = J->BB;
@@ -2062,9 +2059,15 @@ void SelectionDAGBuilder::visitSwitch(SwitchInst &SI) {
 }
 
 void SelectionDAGBuilder::visitIndirectBr(IndirectBrInst &I) {
-  // Update machine-CFG edges.
+  // Update machine-CFG edges with unique successors.
+  SmallVector<BasicBlock*, 32> succs;
+  succs.reserve(I.getNumSuccessors());
   for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i)
-    CurMBB->addSuccessor(FuncInfo.MBBMap[I.getSuccessor(i)]);
+    succs.push_back(I.getSuccessor(i));
+  array_pod_sort(succs.begin(), succs.end());
+  succs.erase(std::unique(succs.begin(), succs.end()), succs.end());
+  for (unsigned i = 0, e = succs.size(); i != e; ++i)
+    CurMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]);
 
   DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(),
                           MVT::Other, getControlRoot(),
@@ -2074,7 +2077,7 @@ void SelectionDAGBuilder::visitIndirectBr(IndirectBrInst &I) {
 void SelectionDAGBuilder::visitFSub(User &I) {
   // -0.0 - X --> fneg
   const Type *Ty = I.getType();
-  if (isa<VectorType>(Ty)) {
+  if (Ty->isVectorTy()) {
     if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) {
       const VectorType *DestTy = cast<VectorType>(I.getType());
       const Type *ElTy = DestTy->getElementType();
@@ -2111,7 +2114,7 @@ void SelectionDAGBuilder::visitBinary(User &I, unsigned OpCode) {
 void SelectionDAGBuilder::visitShift(User &I, unsigned Opcode) {
   SDValue Op1 = getValue(I.getOperand(0));
   SDValue Op2 = getValue(I.getOperand(1));
-  if (!isa<VectorType>(I.getType()) &&
+  if (!I.getType()->isVectorTy() &&
       Op2.getValueType() != TLI.getShiftAmountTy()) {
     // If the operand is smaller than the shift count type, promote it.
     EVT PTy = TLI.getPointerTy();
@@ -2699,7 +2702,9 @@ void SelectionDAGBuilder::visitLoad(LoadInst &I) {
   SDValue Ptr = getValue(SV);
 
   const Type *Ty = I.getType();
+
   bool isVolatile = I.isVolatile();
+  bool isNonTemporal = I.getMetadata("nontemporal") != 0;
   unsigned Alignment = I.getAlignment();
 
   SmallVector<EVT, 4> ValueVTs;
@@ -2731,7 +2736,8 @@ void SelectionDAGBuilder::visitLoad(LoadInst &I) {
                             PtrVT, Ptr,
                             DAG.getConstant(Offsets[i], PtrVT));
     SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
-                            A, SV, Offsets[i], isVolatile, Alignment);
+                            A, SV, Offsets[i], isVolatile, 
+                            isNonTemporal, Alignment);
 
     Values[i] = L;
     Chains[i] = L.getValue(1);
@@ -2772,6 +2778,7 @@ void SelectionDAGBuilder::visitStore(StoreInst &I) {
   SmallVector<SDValue, 4> Chains(NumValues);
   EVT PtrVT = Ptr.getValueType();
   bool isVolatile = I.isVolatile();
+  bool isNonTemporal = I.getMetadata("nontemporal") != 0;
   unsigned Alignment = I.getAlignment();
 
   for (unsigned i = 0; i != NumValues; ++i) {
@@ -2779,7 +2786,8 @@ void SelectionDAGBuilder::visitStore(StoreInst &I) {
                               DAG.getConstant(Offsets[i], PtrVT));
     Chains[i] = DAG.getStore(Root, getCurDebugLoc(),
                              SDValue(Src.getNode(), Src.getResNo() + i),
-                             Add, PtrV, Offsets[i], isVolatile, Alignment);
+                             Add, PtrV, Offsets[i], isVolatile, 
+                             isNonTemporal, Alignment);
   }
 
   DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
@@ -2879,7 +2887,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(CallInst &I,
 ///
 /// where Op is the hexidecimal representation of floating point value.
 static SDValue
-GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl, unsigned Order) {
+GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) {
   SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
                            DAG.getConstant(0x007fffff, MVT::i32));
   SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
@@ -2894,7 +2902,7 @@ GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl, unsigned Order) {
 /// where Op is the hexidecimal representation of floating point value.
 static SDValue
 GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI,
-            DebugLoc dl, unsigned Order) {
+            DebugLoc dl) {
   SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
                            DAG.getConstant(0x7f800000, MVT::i32));
   SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0,
@@ -3078,13 +3086,13 @@ SelectionDAGBuilder::visitLog(CallInst &I) {
     SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
 
     // Scale the exponent by log(2) [0.69314718f].
-    SDValue Exp = GetExponent(DAG, Op1, TLI, dl, SDNodeOrder);
+    SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
     SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
                                         getF32Constant(DAG, 0x3f317218));
 
     // Get the significand and build it into a floating-point number with
     // exponent of 1.
-    SDValue X = GetSignificand(DAG, Op1, dl, SDNodeOrder);
+    SDValue X = GetSignificand(DAG, Op1, dl);
 
     if (LimitFloatPrecision <= 6) {
       // For floating-point precision of 6:
@@ -3188,11 +3196,11 @@ SelectionDAGBuilder::visitLog2(CallInst &I) {
     SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
 
     // Get the exponent.
-    SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl, SDNodeOrder);
+    SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
 
     // Get the significand and build it into a floating-point number with
     // exponent of 1.
-    SDValue X = GetSignificand(DAG, Op1, dl, SDNodeOrder);
+    SDValue X = GetSignificand(DAG, Op1, dl);
 
     // Different possible minimax approximations of significand in
     // floating-point for various degrees of accuracy over [1,2].
@@ -3297,13 +3305,13 @@ SelectionDAGBuilder::visitLog10(CallInst &I) {
     SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
 
     // Scale the exponent by log10(2) [0.30102999f].
-    SDValue Exp = GetExponent(DAG, Op1, TLI, dl, SDNodeOrder);
+    SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
     SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
                                         getF32Constant(DAG, 0x3e9a209a));
 
     // Get the significand and build it into a floating-point number with
     // exponent of 1.
-    SDValue X = GetSignificand(DAG, Op1, dl, SDNodeOrder);
+    SDValue X = GetSignificand(DAG, Op1, dl);
 
     if (LimitFloatPrecision <= 6) {
       // For floating-point precision of 6:
@@ -4058,7 +4066,7 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     // Store the stack protector onto the stack.
     Res = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN,
                        PseudoSourceValue::getFixedStack(FI),
-                       0, true);
+                       0, true, false, 0);
     setValue(&I, Res);
     DAG.setRoot(Res);
     return 0;
@@ -4276,8 +4284,8 @@ isInTailCallPosition(CallSite CS, Attributes CalleeRetAttr,
     // Check for a truly no-op bitcast.
     if (isa<BitCastInst>(U) &&
         (U->getOperand(0)->getType() == U->getType() ||
-         (isa<PointerType>(U->getOperand(0)->getType()) &&
-          isa<PointerType>(U->getType()))))
+         (U->getOperand(0)->getType()->isPointerTy() &&
+          U->getType()->isPointerTy())))
       continue;
     // Otherwise it's not a true no-op.
     return false;
@@ -4385,7 +4393,7 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee,
                     CS.getCallingConv(),
                     isTailCall,
                     !CS.getInstruction()->use_empty(),
-                    Callee, Args, DAG, getCurDebugLoc(), SDNodeOrder);
+                    Callee, Args, DAG, getCurDebugLoc());
   assert((isTailCall || Result.second.getNode()) &&
          "Non-null chain expected with non-tail call!");
   assert((Result.second.getNode() || !Result.first.getNode()) &&
@@ -4410,7 +4418,7 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee,
                                 DemoteStackSlot,
                                 DAG.getConstant(Offsets[i], PtrVT));
       SDValue L = DAG.getLoad(OutVTs[i], getCurDebugLoc(), Result.second,
-                              Add, NULL, Offsets[i], false, 1);
+                              Add, NULL, Offsets[i], false, false, 1);
       Values[i] = L;
       Chains[i] = L.getValue(1);
     }
@@ -4433,7 +4441,7 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee,
       unsigned NumRegs = TLI.getNumRegisters(RetTy->getContext(), VT);
   
       SDValue ReturnValue =
-        getCopyFromParts(DAG, getCurDebugLoc(), SDNodeOrder, &Values[CurReg], NumRegs,
+        getCopyFromParts(DAG, getCurDebugLoc(), &Values[CurReg], NumRegs,
                          RegisterVT, VT, AssertOp);
       ReturnValues.push_back(ReturnValue);
       CurReg += NumRegs;
@@ -4512,7 +4520,8 @@ static SDValue getMemCmpLoad(Value *PtrVal, MVT LoadVT, const Type *LoadTy,
   SDValue Ptr = Builder.getValue(PtrVal);
   SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root,
                                         Ptr, PtrVal /*SrcValue*/, 0/*SVOffset*/,
-                                        false /*volatile*/, 1 /* align=1 */);
+                                        false /*volatile*/,
+                                        false /*nontemporal*/, 1 /* align=1 */);
 
   if (!ConstantMemory)
     Builder.PendingLoads.push_back(LoadVal.getValue(1));
@@ -4529,9 +4538,9 @@ bool SelectionDAGBuilder::visitMemCmpCall(CallInst &I) {
     return false;
 
   Value *LHS = I.getOperand(1), *RHS = I.getOperand(2);
-  if (!isa<PointerType>(LHS->getType()) || !isa<PointerType>(RHS->getType()) ||
-      !isa<IntegerType>(I.getOperand(3)->getType()) ||
-      !isa<IntegerType>(I.getType()))
+  if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() ||
+      !I.getOperand(3)->getType()->isIntegerTy() ||
+      !I.getType()->isIntegerTy())
     return false;
 
   ConstantInt *Size = dyn_cast<ConstantInt>(I.getOperand(3));
@@ -4625,7 +4634,7 @@ void SelectionDAGBuilder::visitCall(CallInst &I) {
       StringRef Name = F->getName();
       if (Name == "copysign" || Name == "copysignf") {
         if (I.getNumOperands() == 3 &&   // Basic sanity checks.
-            I.getOperand(1)->getType()->isFloatingPoint() &&
+            I.getOperand(1)->getType()->isFloatingPointTy() &&
             I.getType() == I.getOperand(1)->getType() &&
             I.getType() == I.getOperand(2)->getType()) {
           SDValue LHS = getValue(I.getOperand(1));
@@ -4636,7 +4645,7 @@ void SelectionDAGBuilder::visitCall(CallInst &I) {
         }
       } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") {
         if (I.getNumOperands() == 2 &&   // Basic sanity checks.
-            I.getOperand(1)->getType()->isFloatingPoint() &&
+            I.getOperand(1)->getType()->isFloatingPointTy() &&
             I.getType() == I.getOperand(1)->getType()) {
           SDValue Tmp = getValue(I.getOperand(1));
           setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(),
@@ -4645,7 +4654,7 @@ void SelectionDAGBuilder::visitCall(CallInst &I) {
         }
       } else if (Name == "sin" || Name == "sinf" || Name == "sinl") {
         if (I.getNumOperands() == 2 &&   // Basic sanity checks.
-            I.getOperand(1)->getType()->isFloatingPoint() &&
+            I.getOperand(1)->getType()->isFloatingPointTy() &&
             I.getType() == I.getOperand(1)->getType() &&
             I.onlyReadsMemory()) {
           SDValue Tmp = getValue(I.getOperand(1));
@@ -4655,7 +4664,7 @@ void SelectionDAGBuilder::visitCall(CallInst &I) {
         }
       } else if (Name == "cos" || Name == "cosf" || Name == "cosl") {
         if (I.getNumOperands() == 2 &&   // Basic sanity checks.
-            I.getOperand(1)->getType()->isFloatingPoint() &&
+            I.getOperand(1)->getType()->isFloatingPointTy() &&
             I.getType() == I.getOperand(1)->getType() &&
             I.onlyReadsMemory()) {
           SDValue Tmp = getValue(I.getOperand(1));
@@ -4665,7 +4674,7 @@ void SelectionDAGBuilder::visitCall(CallInst &I) {
         }
       } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") {
         if (I.getNumOperands() == 2 &&   // Basic sanity checks.
-            I.getOperand(1)->getType()->isFloatingPoint() &&
+            I.getOperand(1)->getType()->isFloatingPointTy() &&
             I.getType() == I.getOperand(1)->getType() &&
             I.onlyReadsMemory()) {
           SDValue Tmp = getValue(I.getOperand(1));
@@ -4699,8 +4708,7 @@ void SelectionDAGBuilder::visitCall(CallInst &I) {
 /// Chain/Flag as the input and updates them for the output Chain/Flag.
 /// If the Flag pointer is NULL, no flag is used.
 SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
-                                      unsigned Order, SDValue &Chain,
-                                      SDValue *Flag) const {
+                                      SDValue &Chain, SDValue *Flag) const {
   // Assemble the legal parts into the final values.
   SmallVector<SDValue, 4> Values(ValueVTs.size());
   SmallVector<SDValue, 8> Parts;
@@ -4765,7 +4773,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
       Parts[i] = P;
     }
 
-    Values[Value] = getCopyFromParts(DAG, dl, Order, Parts.begin(),
+    Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
                                      NumRegs, RegisterVT, ValueVT);
     Part += NumRegs;
     Parts.clear();
@@ -4781,8 +4789,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
 /// Chain/Flag as the input and updates them for the output Chain/Flag.
 /// If the Flag pointer is NULL, no flag is used.
 void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
-                                 unsigned Order, SDValue &Chain,
-                                 SDValue *Flag) const {
+                                 SDValue &Chain, SDValue *Flag) const {
   // Get the list of the values's legal parts.
   unsigned NumRegs = Regs.size();
   SmallVector<SDValue, 8> Parts(NumRegs);
@@ -4791,7 +4798,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
     unsigned NumParts = TLI->getNumRegisters(*DAG.getContext(), ValueVT);
     EVT RegisterVT = RegVTs[Value];
 
-    getCopyToParts(DAG, dl, Order,
+    getCopyToParts(DAG, dl,
                    Val.getValue(Val.getResNo() + Value),
                    &Parts[Part], NumParts, RegisterVT);
     Part += NumParts;
@@ -4832,7 +4839,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
 /// values added into it.
 void RegsForValue::AddInlineAsmOperands(unsigned Code,
                                         bool HasMatching,unsigned MatchingIdx,
-                                        SelectionDAG &DAG, unsigned Order,
+                                        SelectionDAG &DAG,
                                         std::vector<SDValue> &Ops) const {
   assert(Regs.size() < (1 << 13) && "Too many inline asm outputs!");
   unsigned Flag = Code | (Regs.size() << 3);
@@ -5330,7 +5337,8 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
         int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
         SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
         Chain = DAG.getStore(Chain, getCurDebugLoc(),
-                             OpInfo.CallOperand, StackSlot, NULL, 0);
+                             OpInfo.CallOperand, StackSlot, NULL, 0,
+                             false, false, 0);
         OpInfo.CallOperand = StackSlot;
       }
 
@@ -5421,7 +5429,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
                                                2 /* REGDEF */ ,
                                                false,
                                                0,
-                                               DAG, SDNodeOrder,
+                                               DAG,
                                                AsmNodeOperands);
       break;
     }
@@ -5469,10 +5477,10 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
 
           // Use the produced MatchedRegs object to
           MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
-                                    SDNodeOrder, Chain, &Flag);
+                                    Chain, &Flag);
           MatchedRegs.AddInlineAsmOperands(1 /*REGUSE*/,
                                            true, OpInfo.getMatchedOperand(),
-                                           DAG, SDNodeOrder, AsmNodeOperands);
+                                           DAG, AsmNodeOperands);
           break;
         } else {
           assert(((OpFlag & 7) == 4) && "Unknown matching constraint!");
@@ -5533,11 +5541,10 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
       }
 
       OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
-                                        SDNodeOrder, Chain, &Flag);
+                                        Chain, &Flag);
 
       OpInfo.AssignedRegs.AddInlineAsmOperands(1/*REGUSE*/, false, 0,
-                                               DAG, SDNodeOrder,
-                                               AsmNodeOperands);
+                                               DAG, AsmNodeOperands);
       break;
     }
     case InlineAsm::isClobber: {
@@ -5545,7 +5552,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
       // allocator is aware that the physreg got clobbered.
       if (!OpInfo.AssignedRegs.Regs.empty())
         OpInfo.AssignedRegs.AddInlineAsmOperands(6 /* EARLYCLOBBER REGDEF */,
-                                                 false, 0, DAG, SDNodeOrder,
+                                                 false, 0, DAG,
                                                  AsmNodeOperands);
       break;
     }
@@ -5565,7 +5572,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
   // and set it as the value of the call.
   if (!RetValRegs.Regs.empty()) {
     SDValue Val = RetValRegs.getCopyFromRegs(DAG, getCurDebugLoc(),
-                                             SDNodeOrder, Chain, &Flag);
+                                             Chain, &Flag);
 
     // FIXME: Why don't we do this for inline asms with MRVs?
     if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
@@ -5605,7 +5612,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
     RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
     Value *Ptr = IndirectStoresToEmit[i].second;
     SDValue OutVal = OutRegs.getCopyFromRegs(DAG, getCurDebugLoc(),
-                                             SDNodeOrder, Chain, &Flag);
+                                             Chain, &Flag);
     StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
 
   }
@@ -5616,7 +5623,8 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
     SDValue Val = DAG.getStore(Chain, getCurDebugLoc(),
                                StoresToEmit[i].first,
                                getValue(StoresToEmit[i].second),
-                               StoresToEmit[i].second, 0);
+                               StoresToEmit[i].second, 0,
+                               false, false, 0);
     OutChains.push_back(Val);
   }
 
@@ -5669,8 +5677,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
                             CallingConv::ID CallConv, bool isTailCall,
                             bool isReturnValueUsed,
                             SDValue Callee,
-                            ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl,
-                            unsigned Order) {
+                            ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl) {
   // Handle all of the outgoing arguments.
   SmallVector<ISD::OutputArg, 32> Outs;
   for (unsigned i = 0, e = Args.size(); i != e; ++i) {
@@ -5721,7 +5728,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
       else if (Args[i].isZExt)
         ExtendKind = ISD::ZERO_EXTEND;
 
-      getCopyToParts(DAG, dl, Order, Op, &Parts[0], NumParts,
+      getCopyToParts(DAG, dl, Op, &Parts[0], NumParts,
                      PartVT, ExtendKind);
 
       for (unsigned j = 0; j != NumParts; ++j) {
@@ -5800,7 +5807,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
     EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
     unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
 
-    ReturnValues.push_back(getCopyFromParts(DAG, dl, Order, &InVals[CurReg],
+    ReturnValues.push_back(getCopyFromParts(DAG, dl, &InVals[CurReg],
                                             NumRegs, RegisterVT, VT,
                                             AssertOp));
     CurReg += NumRegs;
@@ -5840,7 +5847,7 @@ void SelectionDAGBuilder::CopyValueToVirtualRegister(Value *V, unsigned Reg) {
 
   RegsForValue RFV(V->getContext(), TLI, Reg, V->getType());
   SDValue Chain = DAG.getEntryNode();
-  RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), SDNodeOrder, Chain, 0);
+  RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0);
   PendingExports.push_back(Chain);
 }
 
@@ -5966,7 +5973,7 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
     EVT VT = ValueVTs[0];
     EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
     ISD::NodeType AssertOp = ISD::DELETED_NODE;
-    SDValue ArgValue = getCopyFromParts(DAG, dl, 0, &InVals[0], 1,
+    SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1,
                                         RegVT, VT, AssertOp);
 
     MachineFunction& MF = SDB->DAG.getMachineFunction();
@@ -6000,7 +6007,7 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
         else if (F.paramHasAttr(Idx, Attribute::ZExt))
           AssertOp = ISD::AssertZext;
 
-        ArgValues.push_back(getCopyFromParts(DAG, dl, 0, &InVals[i],
+        ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i],
                                              NumParts, PartVT, VT,
                                              AssertOp));
       }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index da2e6e4..05f9f1f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -56,9 +56,12 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
 #include <algorithm>
 using namespace llvm;
 
+STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on");
+
 static cl::opt<bool>
 EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
           cl::desc("Enable verbose messages in the \"fast\" "
@@ -723,9 +726,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   // code to the MachineBasicBlock.
   if (TimePassesIsEnabled) {
     NamedRegionTimer T("Instruction Selection", GroupName);
-    InstructionSelect();
+    DoInstructionSelection();
   } else {
-    InstructionSelect();
+    DoInstructionSelection();
   }
 
   DEBUG(dbgs() << "Selected selection DAG:\n");
@@ -765,6 +768,66 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   DEBUG(BB->dump());
 }
 
+void SelectionDAGISel::DoInstructionSelection() {
+  DEBUG(errs() << "===== Instruction selection begins:\n");
+
+  PreprocessISelDAG();
+  
+  // Select target instructions for the DAG.
+  {
+    // Number all nodes with a topological order and set DAGSize.
+    DAGSize = CurDAG->AssignTopologicalOrder();
+    
+    // Create a dummy node (which is not added to allnodes), that adds
+    // a reference to the root node, preventing it from being deleted,
+    // and tracking any changes of the root.
+    HandleSDNode Dummy(CurDAG->getRoot());
+    ISelPosition = SelectionDAG::allnodes_iterator(CurDAG->getRoot().getNode());
+    ++ISelPosition;
+    
+    // The AllNodes list is now topological-sorted. Visit the
+    // nodes by starting at the end of the list (the root of the
+    // graph) and preceding back toward the beginning (the entry
+    // node).
+    while (ISelPosition != CurDAG->allnodes_begin()) {
+      SDNode *Node = --ISelPosition;
+      // Skip dead nodes. DAGCombiner is expected to eliminate all dead nodes,
+      // but there are currently some corner cases that it misses. Also, this
+      // makes it theoretically possible to disable the DAGCombiner.
+      if (Node->use_empty())
+        continue;
+      
+      SDNode *ResNode = Select(Node);
+      
+      // FIXME: This is pretty gross.  'Select' should be changed to not return
+      // anything at all and this code should be nuked with a tactical strike.
+      
+      // If node should not be replaced, continue with the next one.
+      if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE)
+        continue;
+      // Replace node.
+      if (ResNode)
+        ReplaceUses(Node, ResNode);
+      
+      // If after the replacement this node is not used any more,
+      // remove this dead node.
+      if (Node->use_empty()) { // Don't delete EntryToken, etc.
+        ISelUpdater ISU(ISelPosition);
+        CurDAG->RemoveDeadNode(Node, &ISU);
+      }
+    }
+    
+    CurDAG->setRoot(Dummy.getValue());
+  }    
+  DEBUG(errs() << "===== Instruction selection ends:\n");
+
+  PostprocessISelDAG();
+  
+  // FIXME: This shouldn't be needed, remove it.
+  CurDAG->RemoveDeadNodes();
+}
+
+
 void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
                                             MachineFunction &MF,
                                             MachineModuleInfo *MMI,
@@ -870,6 +933,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
         // feed PHI nodes in successor blocks.
         if (isa<TerminatorInst>(BI))
           if (!HandlePHINodesInSuccessorBlocksFast(LLVMBB, FastIS)) {
+            ++NumFastIselFailures;
             ResetDebugLoc(SDB, FastIS);
             if (EnableFastISelVerbose || EnableFastISelAbort) {
               dbgs() << "FastISel miss: ";
@@ -894,6 +958,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
 
         // Then handle certain instructions as single-LLVM-Instruction blocks.
         if (isa<CallInst>(BI)) {
+          ++NumFastIselFailures;
           if (EnableFastISelVerbose || EnableFastISelAbort) {
             dbgs() << "FastISel missed call: ";
             BI->dump();
@@ -923,6 +988,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
         // Otherwise, give up on FastISel for the rest of the block.
         // For now, be a little lenient about non-branch terminators.
         if (!isa<TerminatorInst>(BI) || isa<BranchInst>(BI)) {
+          ++NumFastIselFailures;
           if (EnableFastISelVerbose || EnableFastISelAbort) {
             dbgs() << "FastISel miss: ";
             BI->dump();
@@ -972,6 +1038,8 @@ SelectionDAGISel::FinishBasicBlock() {
       MachineInstr *PHI = SDB->PHINodesToUpdate[i].first;
       assert(PHI->isPHI() &&
              "This is not a machine PHI node that we are updating!");
+      if (!BB->isSuccessor(PHI->getParent()))
+        continue;
       PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[i].second,
                                                 false));
       PHI->addOperand(MachineOperand::CreateMBB(BB));
@@ -1316,13 +1384,29 @@ static SDNode *findFlagUse(SDNode *N) {
 /// This function recursively traverses up the operand chain, ignoring
 /// certain nodes.
 static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
-                          SDNode *Root,
-                          SmallPtrSet<SDNode*, 16> &Visited) {
-  if (Use->getNodeId() < Def->getNodeId() ||
-      !Visited.insert(Use))
+                          SDNode *Root, SmallPtrSet<SDNode*, 16> &Visited,
+                          bool IgnoreChains) {
+  // The NodeID's are given uniques ID's where a node ID is guaranteed to be
+  // greater than all of its (recursive) operands.  If we scan to a point where
+  // 'use' is smaller than the node we're scanning for, then we know we will
+  // never find it.
+  //
+  // The Use may be -1 (unassigned) if it is a newly allocated node.  This can
+  // happen because we scan down to newly selected nodes in the case of flag
+  // uses.
+  if ((Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1))
+    return false;
+  
+  // Don't revisit nodes if we already scanned it and didn't fail, we know we
+  // won't fail if we scan it again.
+  if (!Visited.insert(Use))
     return false;
 
   for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
+    // Ignore chain uses, they are validated by HandleMergeInputChains.
+    if (Use->getOperand(i).getValueType() == MVT::Other && IgnoreChains)
+      continue;
+    
     SDNode *N = Use->getOperand(i).getNode();
     if (N == Def) {
       if (Use == ImmedUse || Use == Root)
@@ -1332,32 +1416,24 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
     }
 
     // Traverse up the operand chain.
-    if (findNonImmUse(N, Def, ImmedUse, Root, Visited))
+    if (findNonImmUse(N, Def, ImmedUse, Root, Visited, IgnoreChains))
       return true;
   }
   return false;
 }
 
-/// isNonImmUse - Start searching from Root up the DAG to check is Def can
-/// be reached. Return true if that's the case. However, ignore direct uses
-/// by ImmedUse (which would be U in the example illustrated in
-/// IsLegalAndProfitableToFold) and by Root (which can happen in the store
-/// case).
-/// FIXME: to be really generic, we should allow direct use by any node
-/// that is being folded. But realisticly since we only fold loads which
-/// have one non-chain use, we only need to watch out for load/op/store
-/// and load/op/cmp case where the root (store / cmp) may reach the load via
-/// its chain operand.
-static inline bool isNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse) {
-  SmallPtrSet<SDNode*, 16> Visited;
-  return findNonImmUse(Root, Def, ImmedUse, Root, Visited);
+/// IsProfitableToFold - Returns true if it's profitable to fold the specific
+/// operand node N of U during instruction selection that starts at Root.
+bool SelectionDAGISel::IsProfitableToFold(SDValue N, SDNode *U,
+                                          SDNode *Root) const {
+  if (OptLevel == CodeGenOpt::None) return false;
+  return N.hasOneUse();
 }
 
-/// IsLegalAndProfitableToFold - Returns true if the specific operand node N of
-/// U can be folded during instruction selection that starts at Root and
-/// folding N is profitable.
-bool SelectionDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
-                                                  SDNode *Root) const {
+/// IsLegalToFold - Returns true if the specific operand node N of
+/// U can be folded during instruction selection that starts at Root.
+bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
+                                     bool IgnoreChains) const {
   if (OptLevel == CodeGenOpt::None) return false;
 
   // If Root use can somehow reach N through a path that that doesn't contain
@@ -1402,6 +1478,8 @@ bool SelectionDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
   // Fold. But since Fold and FU are flagged together, this will create
   // a cycle in the scheduling graph.
 
+  // If the node has flags, walk down the graph to the "lowest" node in the
+  // flagged set.
   EVT VT = Root->getValueType(Root->getNumValues()-1);
   while (VT == MVT::Flag) {
     SDNode *FU = findFlagUse(Root);
@@ -1409,9 +1487,17 @@ bool SelectionDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
       break;
     Root = FU;
     VT = Root->getValueType(Root->getNumValues()-1);
+    
+    // If our query node has a flag result with a use, we've walked up it.  If
+    // the user (which has already been selected) has a chain or indirectly uses
+    // the chain, our WalkChainUsers predicate will not consider it.  Because of
+    // this, we cannot ignore chains in this predicate.
+    IgnoreChains = false;
   }
+  
 
-  return !isNonImmUse(Root, N, U);
+  SmallPtrSet<SDNode*, 16> Visited;
+  return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains);
 }
 
 SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
@@ -1423,6 +1509,7 @@ SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
   VTs.push_back(MVT::Flag);
   SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(),
                                 VTs, &Ops[0], Ops.size());
+  New->setNodeId(-1);
   return New.getNode();
 }
 
@@ -1438,25 +1525,1219 @@ SDNode *SelectionDAGISel::Select_EH_LABEL(SDNode *N) {
                               MVT::Other, Tmp, Chain);
 }
 
+/// GetVBR - decode a vbr encoding whose top bit is set.
+ALWAYS_INLINE static uint64_t
+GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
+  assert(Val >= 128 && "Not a VBR");
+  Val &= 127;  // Remove first vbr bit.
+  
+  unsigned Shift = 7;
+  uint64_t NextBits;
+  do {
+    NextBits = MatcherTable[Idx++];
+    Val |= (NextBits&127) << Shift;
+    Shift += 7;
+  } while (NextBits & 128);
+  
+  return Val;
+}
+
+
+/// UpdateChainsAndFlags - When a match is complete, this method updates uses of
+/// interior flag and chain results to use the new flag and chain results.
+void SelectionDAGISel::
+UpdateChainsAndFlags(SDNode *NodeToMatch, SDValue InputChain,
+                     const SmallVectorImpl<SDNode*> &ChainNodesMatched,
+                     SDValue InputFlag,
+                     const SmallVectorImpl<SDNode*> &FlagResultNodesMatched,
+                     bool isMorphNodeTo) {
+  SmallVector<SDNode*, 4> NowDeadNodes;
+  
+  ISelUpdater ISU(ISelPosition);
+
+  // Now that all the normal results are replaced, we replace the chain and
+  // flag results if present.
+  if (!ChainNodesMatched.empty()) {
+    assert(InputChain.getNode() != 0 &&
+           "Matched input chains but didn't produce a chain");
+    // Loop over all of the nodes we matched that produced a chain result.
+    // Replace all the chain results with the final chain we ended up with.
+    for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
+      SDNode *ChainNode = ChainNodesMatched[i];
+      
+      // If this node was already deleted, don't look at it.
+      if (ChainNode->getOpcode() == ISD::DELETED_NODE)
+        continue;
+      
+      // Don't replace the results of the root node if we're doing a
+      // MorphNodeTo.
+      if (ChainNode == NodeToMatch && isMorphNodeTo)
+        continue;
+      
+      SDValue ChainVal = SDValue(ChainNode, ChainNode->getNumValues()-1);
+      if (ChainVal.getValueType() == MVT::Flag)
+        ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2);
+      assert(ChainVal.getValueType() == MVT::Other && "Not a chain?");
+      CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain, &ISU);
+      
+      // If the node became dead, delete it.
+      if (ChainNode->use_empty())
+        NowDeadNodes.push_back(ChainNode);
+    }
+  }
+  
+  // If the result produces a flag, update any flag results in the matched
+  // pattern with the flag result.
+  if (InputFlag.getNode() != 0) {
+    // Handle any interior nodes explicitly marked.
+    for (unsigned i = 0, e = FlagResultNodesMatched.size(); i != e; ++i) {
+      SDNode *FRN = FlagResultNodesMatched[i];
+      
+      // If this node was already deleted, don't look at it.
+      if (FRN->getOpcode() == ISD::DELETED_NODE)
+        continue;
+      
+      assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Flag &&
+             "Doesn't have a flag result");
+      CurDAG->ReplaceAllUsesOfValueWith(SDValue(FRN, FRN->getNumValues()-1),
+                                        InputFlag, &ISU);
+      
+      // If the node became dead, delete it.
+      if (FRN->use_empty())
+        NowDeadNodes.push_back(FRN);
+    }
+  }
+  
+  if (!NowDeadNodes.empty())
+    CurDAG->RemoveDeadNodes(NowDeadNodes, &ISU);
+  
+  DEBUG(errs() << "ISEL: Match complete!\n");
+}
+
+enum ChainResult {
+  CR_Simple,
+  CR_InducesCycle,
+  CR_LeadsToInteriorNode
+};
+
+/// WalkChainUsers - Walk down the users of the specified chained node that is
+/// part of the pattern we're matching, looking at all of the users we find.
+/// This determines whether something is an interior node, whether we have a
+/// non-pattern node in between two pattern nodes (which prevent folding because
+/// it would induce a cycle) and whether we have a TokenFactor node sandwiched
+/// between pattern nodes (in which case the TF becomes part of the pattern).
+///
+/// The walk we do here is guaranteed to be small because we quickly get down to
+/// already selected nodes "below" us.
+static ChainResult 
+WalkChainUsers(SDNode *ChainedNode,
+               SmallVectorImpl<SDNode*> &ChainedNodesInPattern,
+               SmallVectorImpl<SDNode*> &InteriorChainedNodes) {
+  ChainResult Result = CR_Simple;
+  
+  for (SDNode::use_iterator UI = ChainedNode->use_begin(),
+         E = ChainedNode->use_end(); UI != E; ++UI) {
+    // Make sure the use is of the chain, not some other value we produce.
+    if (UI.getUse().getValueType() != MVT::Other) continue;
+    
+    SDNode *User = *UI;
+
+    // If we see an already-selected machine node, then we've gone beyond the
+    // pattern that we're selecting down into the already selected chunk of the
+    // DAG.
+    if (User->isMachineOpcode() ||
+        User->getOpcode() == ISD::HANDLENODE)  // Root of the graph.
+      continue;
+    
+    if (User->getOpcode() == ISD::CopyToReg ||
+        User->getOpcode() == ISD::CopyFromReg ||
+        User->getOpcode() == ISD::INLINEASM) {
+      // If their node ID got reset to -1 then they've already been selected.
+      // Treat them like a MachineOpcode.
+      if (User->getNodeId() == -1)
+        continue;
+    }
+
+    // If we have a TokenFactor, we handle it specially.
+    if (User->getOpcode() != ISD::TokenFactor) {
+      // If the node isn't a token factor and isn't part of our pattern, then it
+      // must be a random chained node in between two nodes we're selecting.
+      // This happens when we have something like:
+      //   x = load ptr
+      //   call
+      //   y = x+4
+      //   store y -> ptr
+      // Because we structurally match the load/store as a read/modify/write,
+      // but the call is chained between them.  We cannot fold in this case
+      // because it would induce a cycle in the graph.
+      if (!std::count(ChainedNodesInPattern.begin(),
+                      ChainedNodesInPattern.end(), User))
+        return CR_InducesCycle;
+      
+      // Otherwise we found a node that is part of our pattern.  For example in:
+      //   x = load ptr
+      //   y = x+4
+      //   store y -> ptr
+      // This would happen when we're scanning down from the load and see the
+      // store as a user.  Record that there is a use of ChainedNode that is
+      // part of the pattern and keep scanning uses.
+      Result = CR_LeadsToInteriorNode;
+      InteriorChainedNodes.push_back(User);
+      continue;
+    }
+    
+    // If we found a TokenFactor, there are two cases to consider: first if the
+    // TokenFactor is just hanging "below" the pattern we're matching (i.e. no
+    // uses of the TF are in our pattern) we just want to ignore it.  Second,
+    // the TokenFactor can be sandwiched in between two chained nodes, like so:
+    //     [Load chain]
+    //         ^
+    //         |
+    //       [Load]
+    //       ^    ^
+    //       |    \                    DAG's like cheese
+    //      /       \                       do you?
+    //     /         |
+    // [TokenFactor] [Op]
+    //     ^          ^
+    //     |          |
+    //      \        /
+    //       \      /
+    //       [Store]
+    //
+    // In this case, the TokenFactor becomes part of our match and we rewrite it
+    // as a new TokenFactor.
+    //
+    // To distinguish these two cases, do a recursive walk down the uses.
+    switch (WalkChainUsers(User, ChainedNodesInPattern, InteriorChainedNodes)) {
+    case CR_Simple:
+      // If the uses of the TokenFactor are just already-selected nodes, ignore
+      // it, it is "below" our pattern.
+      continue;
+    case CR_InducesCycle:
+      // If the uses of the TokenFactor lead to nodes that are not part of our
+      // pattern that are not selected, folding would turn this into a cycle,
+      // bail out now.
+      return CR_InducesCycle;
+    case CR_LeadsToInteriorNode:
+      break;  // Otherwise, keep processing.
+    }
+    
+    // Okay, we know we're in the interesting interior case.  The TokenFactor
+    // is now going to be considered part of the pattern so that we rewrite its
+    // uses (it may have uses that are not part of the pattern) with the
+    // ultimate chain result of the generated code.  We will also add its chain
+    // inputs as inputs to the ultimate TokenFactor we create.
+    Result = CR_LeadsToInteriorNode;
+    ChainedNodesInPattern.push_back(User);
+    InteriorChainedNodes.push_back(User);
+    continue;
+  }
+  
+  return Result;
+}
+
+/// HandleMergeInputChains - This implements the OPC_EmitMergeInputChains
+/// operation for when the pattern matched at least one node with a chains.  The
+/// input vector contains a list of all of the chained nodes that we match.  We
+/// must determine if this is a valid thing to cover (i.e. matching it won't
+/// induce cycles in the DAG) and if so, creating a TokenFactor node. that will
+/// be used as the input node chain for the generated nodes.
+static SDValue
+HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
+                       SelectionDAG *CurDAG) {
+  // Walk all of the chained nodes we've matched, recursively scanning down the
+  // users of the chain result. This adds any TokenFactor nodes that are caught
+  // in between chained nodes to the chained and interior nodes list.
+  SmallVector<SDNode*, 3> InteriorChainedNodes;
+  for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
+    if (WalkChainUsers(ChainNodesMatched[i], ChainNodesMatched,
+                       InteriorChainedNodes) == CR_InducesCycle)
+      return SDValue(); // Would induce a cycle.
+  }
+  
+  // Okay, we have walked all the matched nodes and collected TokenFactor nodes
+  // that we are interested in.  Form our input TokenFactor node.
+  SmallVector<SDValue, 3> InputChains;
+  for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
+    // Add the input chain of this node to the InputChains list (which will be
+    // the operands of the generated TokenFactor) if it's not an interior node.
+    SDNode *N = ChainNodesMatched[i];
+    if (N->getOpcode() != ISD::TokenFactor) {
+      if (std::count(InteriorChainedNodes.begin(),InteriorChainedNodes.end(),N))
+        continue;
+      
+      // Otherwise, add the input chain.
+      SDValue InChain = ChainNodesMatched[i]->getOperand(0);
+      assert(InChain.getValueType() == MVT::Other && "Not a chain");
+      InputChains.push_back(InChain);
+      continue;
+    }
+    
+    // If we have a token factor, we want to add all inputs of the token factor
+    // that are not part of the pattern we're matching.
+    for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) {
+      if (!std::count(ChainNodesMatched.begin(), ChainNodesMatched.end(),
+                      N->getOperand(op).getNode()))
+        InputChains.push_back(N->getOperand(op));
+    }
+  }
+  
+  SDValue Res;
+  if (InputChains.size() == 1)
+    return InputChains[0];
+  return CurDAG->getNode(ISD::TokenFactor, ChainNodesMatched[0]->getDebugLoc(),
+                         MVT::Other, &InputChains[0], InputChains.size());
+}  
+
+/// MorphNode - Handle morphing a node in place for the selector.
+SDNode *SelectionDAGISel::
+MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
+          const SDValue *Ops, unsigned NumOps, unsigned EmitNodeInfo) {
+  // It is possible we're using MorphNodeTo to replace a node with no
+  // normal results with one that has a normal result (or we could be
+  // adding a chain) and the input could have flags and chains as well.
+  // In this case we need to shifting the operands down.
+  // FIXME: This is a horrible hack and broken in obscure cases, no worse
+  // than the old isel though.  We should sink this into MorphNodeTo.
+  int OldFlagResultNo = -1, OldChainResultNo = -1;
+
+  unsigned NTMNumResults = Node->getNumValues();
+  if (Node->getValueType(NTMNumResults-1) == MVT::Flag) {
+    OldFlagResultNo = NTMNumResults-1;
+    if (NTMNumResults != 1 &&
+        Node->getValueType(NTMNumResults-2) == MVT::Other)
+      OldChainResultNo = NTMNumResults-2;
+  } else if (Node->getValueType(NTMNumResults-1) == MVT::Other)
+    OldChainResultNo = NTMNumResults-1;
+
+  // Call the underlying SelectionDAG routine to do the transmogrification. Note
+  // that this deletes operands of the old node that become dead.
+  SDNode *Res = CurDAG->MorphNodeTo(Node, ~TargetOpc, VTList, Ops, NumOps);
+
+  // MorphNodeTo can operate in two ways: if an existing node with the
+  // specified operands exists, it can just return it.  Otherwise, it
+  // updates the node in place to have the requested operands.
+  if (Res == Node) {
+    // If we updated the node in place, reset the node ID.  To the isel,
+    // this should be just like a newly allocated machine node.
+    Res->setNodeId(-1);
+  }
+
+  unsigned ResNumResults = Res->getNumValues();
+  // Move the flag if needed.
+  if ((EmitNodeInfo & OPFL_FlagOutput) && OldFlagResultNo != -1 &&
+      (unsigned)OldFlagResultNo != ResNumResults-1)
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldFlagResultNo), 
+                                      SDValue(Res, ResNumResults-1));
+
+  if ((EmitNodeInfo & OPFL_FlagOutput) != 0)
+  --ResNumResults;
+
+  // Move the chain reference if needed.
+  if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 &&
+      (unsigned)OldChainResultNo != ResNumResults-1)
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo), 
+                                      SDValue(Res, ResNumResults-1));
+
+  // Otherwise, no replacement happened because the node already exists. Replace
+  // Uses of the old node with the new one.
+  if (Res != Node)
+    CurDAG->ReplaceAllUsesWith(Node, Res);
+  
+  return Res;
+}
+
+/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
+ALWAYS_INLINE static bool
+CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+          SDValue N, const SmallVectorImpl<SDValue> &RecordedNodes) {
+  // Accept if it is exactly the same as a previously recorded node.
+  unsigned RecNo = MatcherTable[MatcherIndex++];
+  assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+  return N == RecordedNodes[RecNo];
+}
+  
+/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
+ALWAYS_INLINE static bool
+CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+                      SelectionDAGISel &SDISel) {
+  return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]);
+}
+
+/// CheckNodePredicate - Implements OP_CheckNodePredicate.
+ALWAYS_INLINE static bool
+CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+                   SelectionDAGISel &SDISel, SDNode *N) {
+  return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]);
+}
+
+ALWAYS_INLINE static bool
+CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+            SDNode *N) {
+  return N->getOpcode() == MatcherTable[MatcherIndex++];
+}
+
+ALWAYS_INLINE static bool
+CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+          SDValue N, const TargetLowering &TLI) {
+  MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+  if (N.getValueType() == VT) return true;
+  
+  // Handle the case when VT is iPTR.
+  return VT == MVT::iPTR && N.getValueType() == TLI.getPointerTy();
+}
+
+ALWAYS_INLINE static bool
+CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+               SDValue N, const TargetLowering &TLI,
+               unsigned ChildNo) {
+  if (ChildNo >= N.getNumOperands())
+    return false;  // Match fails if out of range child #.
+  return ::CheckType(MatcherTable, MatcherIndex, N.getOperand(ChildNo), TLI);
+}
+
+
+ALWAYS_INLINE static bool
+CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+              SDValue N) {
+  return cast<CondCodeSDNode>(N)->get() ==
+      (ISD::CondCode)MatcherTable[MatcherIndex++];
+}
+
+ALWAYS_INLINE static bool
+CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+               SDValue N, const TargetLowering &TLI) {
+  MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+  if (cast<VTSDNode>(N)->getVT() == VT)
+    return true;
+  
+  // Handle the case when VT is iPTR.
+  return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI.getPointerTy();
+}
+
+ALWAYS_INLINE static bool
+CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+             SDValue N) {
+  int64_t Val = MatcherTable[MatcherIndex++];
+  if (Val & 128)
+    Val = GetVBR(Val, MatcherTable, MatcherIndex);
+  
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
+  return C != 0 && C->getSExtValue() == Val;
+}
+
+ALWAYS_INLINE static bool
+CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+            SDValue N, SelectionDAGISel &SDISel) {
+  int64_t Val = MatcherTable[MatcherIndex++];
+  if (Val & 128)
+    Val = GetVBR(Val, MatcherTable, MatcherIndex);
+  
+  if (N->getOpcode() != ISD::AND) return false;
+  
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+  return C != 0 && SDISel.CheckAndMask(N.getOperand(0), C, Val);
+}
+
+ALWAYS_INLINE static bool
+CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+           SDValue N, SelectionDAGISel &SDISel) {
+  int64_t Val = MatcherTable[MatcherIndex++];
+  if (Val & 128)
+    Val = GetVBR(Val, MatcherTable, MatcherIndex);
+  
+  if (N->getOpcode() != ISD::OR) return false;
+  
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+  return C != 0 && SDISel.CheckOrMask(N.getOperand(0), C, Val);
+}
+
+/// IsPredicateKnownToFail - If we know how and can do so without pushing a
+/// scope, evaluate the current node.  If the current predicate is known to
+/// fail, set Result=true and return anything.  If the current predicate is
+/// known to pass, set Result=false and return the MatcherIndex to continue
+/// with.  If the current predicate is unknown, set Result=false and return the
+/// MatcherIndex to continue with. 
+static unsigned IsPredicateKnownToFail(const unsigned char *Table,
+                                       unsigned Index, SDValue N,
+                                       bool &Result, SelectionDAGISel &SDISel,
+                                       SmallVectorImpl<SDValue> &RecordedNodes){
+  switch (Table[Index++]) {
+  default:
+    Result = false;
+    return Index-1;  // Could not evaluate this predicate.
+  case SelectionDAGISel::OPC_CheckSame:
+    Result = !::CheckSame(Table, Index, N, RecordedNodes);
+    return Index;
+  case SelectionDAGISel::OPC_CheckPatternPredicate:
+    Result = !::CheckPatternPredicate(Table, Index, SDISel);
+    return Index;
+  case SelectionDAGISel::OPC_CheckPredicate:
+    Result = !::CheckNodePredicate(Table, Index, SDISel, N.getNode());
+    return Index;
+  case SelectionDAGISel::OPC_CheckOpcode:
+    Result = !::CheckOpcode(Table, Index, N.getNode());
+    return Index;
+  case SelectionDAGISel::OPC_CheckType:
+    Result = !::CheckType(Table, Index, N, SDISel.TLI);
+    return Index;
+  case SelectionDAGISel::OPC_CheckChild0Type:
+  case SelectionDAGISel::OPC_CheckChild1Type:
+  case SelectionDAGISel::OPC_CheckChild2Type:
+  case SelectionDAGISel::OPC_CheckChild3Type:
+  case SelectionDAGISel::OPC_CheckChild4Type:
+  case SelectionDAGISel::OPC_CheckChild5Type:
+  case SelectionDAGISel::OPC_CheckChild6Type:
+  case SelectionDAGISel::OPC_CheckChild7Type:
+    Result = !::CheckChildType(Table, Index, N, SDISel.TLI,
+                        Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Type);
+    return Index;
+  case SelectionDAGISel::OPC_CheckCondCode:
+    Result = !::CheckCondCode(Table, Index, N);
+    return Index;
+  case SelectionDAGISel::OPC_CheckValueType:
+    Result = !::CheckValueType(Table, Index, N, SDISel.TLI);
+    return Index;
+  case SelectionDAGISel::OPC_CheckInteger:
+    Result = !::CheckInteger(Table, Index, N);
+    return Index;
+  case SelectionDAGISel::OPC_CheckAndImm:
+    Result = !::CheckAndImm(Table, Index, N, SDISel);
+    return Index;
+  case SelectionDAGISel::OPC_CheckOrImm:
+    Result = !::CheckOrImm(Table, Index, N, SDISel);
+    return Index;
+  }
+}
+
+
+struct MatchScope {
+  /// FailIndex - If this match fails, this is the index to continue with.
+  unsigned FailIndex;
+  
+  /// NodeStack - The node stack when the scope was formed.
+  SmallVector<SDValue, 4> NodeStack;
+  
+  /// NumRecordedNodes - The number of recorded nodes when the scope was formed.
+  unsigned NumRecordedNodes;
+  
+  /// NumMatchedMemRefs - The number of matched memref entries.
+  unsigned NumMatchedMemRefs;
+  
+  /// InputChain/InputFlag - The current chain/flag 
+  SDValue InputChain, InputFlag;
+
+  /// HasChainNodesMatched - True if the ChainNodesMatched list is non-empty.
+  bool HasChainNodesMatched, HasFlagResultNodesMatched;
+};
+
+SDNode *SelectionDAGISel::
+SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
+                 unsigned TableSize) {
+  // FIXME: Should these even be selected?  Handle these cases in the caller?
+  switch (NodeToMatch->getOpcode()) {
+  default:
+    break;
+  case ISD::EntryToken:       // These nodes remain the same.
+  case ISD::BasicBlock:
+  case ISD::Register:
+  case ISD::HANDLENODE:
+  case ISD::TargetConstant:
+  case ISD::TargetConstantFP:
+  case ISD::TargetConstantPool:
+  case ISD::TargetFrameIndex:
+  case ISD::TargetExternalSymbol:
+  case ISD::TargetBlockAddress:
+  case ISD::TargetJumpTable:
+  case ISD::TargetGlobalTLSAddress:
+  case ISD::TargetGlobalAddress:
+  case ISD::TokenFactor:
+  case ISD::CopyFromReg:
+  case ISD::CopyToReg:
+    NodeToMatch->setNodeId(-1); // Mark selected.
+    return 0;
+  case ISD::AssertSext:
+  case ISD::AssertZext:
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, 0),
+                                      NodeToMatch->getOperand(0));
+    return 0;
+  case ISD::INLINEASM: return Select_INLINEASM(NodeToMatch);
+  case ISD::EH_LABEL:  return Select_EH_LABEL(NodeToMatch);
+  case ISD::UNDEF:     return Select_UNDEF(NodeToMatch);
+  }
+  
+  assert(!NodeToMatch->isMachineOpcode() && "Node already selected!");
+
+  // Set up the node stack with NodeToMatch as the only node on the stack.
+  SmallVector<SDValue, 8> NodeStack;
+  SDValue N = SDValue(NodeToMatch, 0);
+  NodeStack.push_back(N);
+
+  // MatchScopes - Scopes used when matching, if a match failure happens, this
+  // indicates where to continue checking.
+  SmallVector<MatchScope, 8> MatchScopes;
+  
+  // RecordedNodes - This is the set of nodes that have been recorded by the
+  // state machine.
+  SmallVector<SDValue, 8> RecordedNodes;
+  
+  // MatchedMemRefs - This is the set of MemRef's we've seen in the input
+  // pattern.
+  SmallVector<MachineMemOperand*, 2> MatchedMemRefs;
+  
+  // These are the current input chain and flag for use when generating nodes.
+  // Various Emit operations change these.  For example, emitting a copytoreg
+  // uses and updates these.
+  SDValue InputChain, InputFlag;
+  
+  // ChainNodesMatched - If a pattern matches nodes that have input/output
+  // chains, the OPC_EmitMergeInputChains operation is emitted which indicates
+  // which ones they are.  The result is captured into this list so that we can
+  // update the chain results when the pattern is complete.
+  SmallVector<SDNode*, 3> ChainNodesMatched;
+  SmallVector<SDNode*, 3> FlagResultNodesMatched;
+  
+  DEBUG(errs() << "ISEL: Starting pattern match on root node: ";
+        NodeToMatch->dump(CurDAG);
+        errs() << '\n');
+  
+  // Determine where to start the interpreter.  Normally we start at opcode #0,
+  // but if the state machine starts with an OPC_SwitchOpcode, then we
+  // accelerate the first lookup (which is guaranteed to be hot) with the
+  // OpcodeOffset table.
+  unsigned MatcherIndex = 0;
+  
+  if (!OpcodeOffset.empty()) {
+    // Already computed the OpcodeOffset table, just index into it.
+    if (N.getOpcode() < OpcodeOffset.size())
+      MatcherIndex = OpcodeOffset[N.getOpcode()];
+    DEBUG(errs() << "  Initial Opcode index to " << MatcherIndex << "\n");
+
+  } else if (MatcherTable[0] == OPC_SwitchOpcode) {
+    // Otherwise, the table isn't computed, but the state machine does start
+    // with an OPC_SwitchOpcode instruction.  Populate the table now, since this
+    // is the first time we're selecting an instruction.
+    unsigned Idx = 1;
+    while (1) {
+      // Get the size of this case.
+      unsigned CaseSize = MatcherTable[Idx++];
+      if (CaseSize & 128)
+        CaseSize = GetVBR(CaseSize, MatcherTable, Idx);
+      if (CaseSize == 0) break;
+
+      // Get the opcode, add the index to the table.
+      unsigned Opc = MatcherTable[Idx++];
+      if (Opc >= OpcodeOffset.size())
+        OpcodeOffset.resize((Opc+1)*2);
+      OpcodeOffset[Opc] = Idx;
+      Idx += CaseSize;
+    }
+
+    // Okay, do the lookup for the first opcode.
+    if (N.getOpcode() < OpcodeOffset.size())
+      MatcherIndex = OpcodeOffset[N.getOpcode()];
+  }
+  
+  while (1) {
+    assert(MatcherIndex < TableSize && "Invalid index");
+    BuiltinOpcodes Opcode = (BuiltinOpcodes)MatcherTable[MatcherIndex++];
+    switch (Opcode) {
+    case OPC_Scope: {
+      // Okay, the semantics of this operation are that we should push a scope
+      // then evaluate the first child.  However, pushing a scope only to have
+      // the first check fail (which then pops it) is inefficient.  If we can
+      // determine immediately that the first check (or first several) will
+      // immediately fail, don't even bother pushing a scope for them.
+      unsigned FailIndex;
+      
+      while (1) {
+        unsigned NumToSkip = MatcherTable[MatcherIndex++];
+        if (NumToSkip & 128)
+          NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex);
+        // Found the end of the scope with no match.
+        if (NumToSkip == 0) {
+          FailIndex = 0;
+          break;
+        }
+        
+        FailIndex = MatcherIndex+NumToSkip;
+        
+        // If we can't evaluate this predicate without pushing a scope (e.g. if
+        // it is a 'MoveParent') or if the predicate succeeds on this node, we
+        // push the scope and evaluate the full predicate chain.
+        bool Result;
+        MatcherIndex = IsPredicateKnownToFail(MatcherTable, MatcherIndex, N,
+                                              Result, *this, RecordedNodes);
+        if (!Result)
+          break;
+        
+        DEBUG(errs() << "  Skipped scope entry at index " << MatcherIndex
+              << " continuing at " << FailIndex << "\n");
+
+        
+        // Otherwise, we know that this case of the Scope is guaranteed to fail,
+        // move to the next case.
+        MatcherIndex = FailIndex;
+      }
+      
+      // If the whole scope failed to match, bail.
+      if (FailIndex == 0) break;
+      
+      // Push a MatchScope which indicates where to go if the first child fails
+      // to match.
+      MatchScope NewEntry;
+      NewEntry.FailIndex = FailIndex;
+      NewEntry.NodeStack.append(NodeStack.begin(), NodeStack.end());
+      NewEntry.NumRecordedNodes = RecordedNodes.size();
+      NewEntry.NumMatchedMemRefs = MatchedMemRefs.size();
+      NewEntry.InputChain = InputChain;
+      NewEntry.InputFlag = InputFlag;
+      NewEntry.HasChainNodesMatched = !ChainNodesMatched.empty();
+      NewEntry.HasFlagResultNodesMatched = !FlagResultNodesMatched.empty();
+      MatchScopes.push_back(NewEntry);
+      continue;
+    }
+    case OPC_RecordNode:
+      // Remember this node, it may end up being an operand in the pattern.
+      RecordedNodes.push_back(N);
+      continue;
+        
+    case OPC_RecordChild0: case OPC_RecordChild1:
+    case OPC_RecordChild2: case OPC_RecordChild3:
+    case OPC_RecordChild4: case OPC_RecordChild5:
+    case OPC_RecordChild6: case OPC_RecordChild7: {
+      unsigned ChildNo = Opcode-OPC_RecordChild0;
+      if (ChildNo >= N.getNumOperands())
+        break;  // Match fails if out of range child #.
+
+      RecordedNodes.push_back(N->getOperand(ChildNo));
+      continue;
+    }
+    case OPC_RecordMemRef:
+      MatchedMemRefs.push_back(cast<MemSDNode>(N)->getMemOperand());
+      continue;
+        
+    case OPC_CaptureFlagInput:
+      // If the current node has an input flag, capture it in InputFlag.
+      if (N->getNumOperands() != 0 &&
+          N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag)
+        InputFlag = N->getOperand(N->getNumOperands()-1);
+      continue;
+        
+    case OPC_MoveChild: {
+      unsigned ChildNo = MatcherTable[MatcherIndex++];
+      if (ChildNo >= N.getNumOperands())
+        break;  // Match fails if out of range child #.
+      N = N.getOperand(ChildNo);
+      NodeStack.push_back(N);
+      continue;
+    }
+        
+    case OPC_MoveParent:
+      // Pop the current node off the NodeStack.
+      NodeStack.pop_back();
+      assert(!NodeStack.empty() && "Node stack imbalance!");
+      N = NodeStack.back();  
+      continue;
+     
+    case OPC_CheckSame:
+      if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break;
+      continue;
+    case OPC_CheckPatternPredicate:
+      if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this)) break;
+      continue;
+    case OPC_CheckPredicate:
+      if (!::CheckNodePredicate(MatcherTable, MatcherIndex, *this,
+                                N.getNode()))
+        break;
+      continue;
+    case OPC_CheckComplexPat: {
+      unsigned CPNum = MatcherTable[MatcherIndex++];
+      unsigned RecNo = MatcherTable[MatcherIndex++];
+      assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat");
+      if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo], CPNum,
+                               RecordedNodes))
+        break;
+      continue;
+    }
+    case OPC_CheckOpcode:
+      if (!::CheckOpcode(MatcherTable, MatcherIndex, N.getNode())) break;
+      continue;
+        
+    case OPC_CheckType:
+      if (!::CheckType(MatcherTable, MatcherIndex, N, TLI)) break;
+      continue;
+        
+    case OPC_SwitchOpcode: {
+      unsigned CurNodeOpcode = N.getOpcode();
+      unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
+      unsigned CaseSize;
+      while (1) {
+        // Get the size of this case.
+        CaseSize = MatcherTable[MatcherIndex++];
+        if (CaseSize & 128)
+          CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
+        if (CaseSize == 0) break;
+
+        // If the opcode matches, then we will execute this case.
+        if (CurNodeOpcode == MatcherTable[MatcherIndex++])
+          break;
+      
+        // Otherwise, skip over this case.
+        MatcherIndex += CaseSize;
+      }
+      
+      // If no cases matched, bail out.
+      if (CaseSize == 0) break;
+      
+      // Otherwise, execute the case we found.
+      DEBUG(errs() << "  OpcodeSwitch from " << SwitchStart
+                   << " to " << MatcherIndex << "\n");
+      continue;
+    }
+        
+    case OPC_SwitchType: {
+      MVT::SimpleValueType CurNodeVT = N.getValueType().getSimpleVT().SimpleTy;
+      unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
+      unsigned CaseSize;
+      while (1) {
+        // Get the size of this case.
+        CaseSize = MatcherTable[MatcherIndex++];
+        if (CaseSize & 128)
+          CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
+        if (CaseSize == 0) break;
+        
+        MVT::SimpleValueType CaseVT =
+          (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+        if (CaseVT == MVT::iPTR)
+          CaseVT = TLI.getPointerTy().SimpleTy;
+        
+        // If the VT matches, then we will execute this case.
+        if (CurNodeVT == CaseVT)
+          break;
+        
+        // Otherwise, skip over this case.
+        MatcherIndex += CaseSize;
+      }
+      
+      // If no cases matched, bail out.
+      if (CaseSize == 0) break;
+      
+      // Otherwise, execute the case we found.
+      DEBUG(errs() << "  TypeSwitch[" << EVT(CurNodeVT).getEVTString()
+                   << "] from " << SwitchStart << " to " << MatcherIndex<<'\n');
+      continue;
+    }
+    case OPC_CheckChild0Type: case OPC_CheckChild1Type:
+    case OPC_CheckChild2Type: case OPC_CheckChild3Type:
+    case OPC_CheckChild4Type: case OPC_CheckChild5Type:
+    case OPC_CheckChild6Type: case OPC_CheckChild7Type:
+      if (!::CheckChildType(MatcherTable, MatcherIndex, N, TLI,
+                            Opcode-OPC_CheckChild0Type))
+        break;
+      continue;
+    case OPC_CheckCondCode:
+      if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break;
+      continue;
+    case OPC_CheckValueType:
+      if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI)) break;
+      continue;
+    case OPC_CheckInteger:
+      if (!::CheckInteger(MatcherTable, MatcherIndex, N)) break;
+      continue;
+    case OPC_CheckAndImm:
+      if (!::CheckAndImm(MatcherTable, MatcherIndex, N, *this)) break;
+      continue;
+    case OPC_CheckOrImm:
+      if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break;
+      continue;
+        
+    case OPC_CheckFoldableChainNode: {
+      assert(NodeStack.size() != 1 && "No parent node");
+      // Verify that all intermediate nodes between the root and this one have
+      // a single use.
+      bool HasMultipleUses = false;
+      for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i)
+        if (!NodeStack[i].hasOneUse()) {
+          HasMultipleUses = true;
+          break;
+        }
+      if (HasMultipleUses) break;
+
+      // Check to see that the target thinks this is profitable to fold and that
+      // we can fold it without inducing cycles in the graph.
+      if (!IsProfitableToFold(N, NodeStack[NodeStack.size()-2].getNode(),
+                              NodeToMatch) ||
+          !IsLegalToFold(N, NodeStack[NodeStack.size()-2].getNode(),
+                         NodeToMatch, true/*We validate our own chains*/))
+        break;
+      
+      continue;
+    }
+    case OPC_EmitInteger: {
+      MVT::SimpleValueType VT =
+        (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+      int64_t Val = MatcherTable[MatcherIndex++];
+      if (Val & 128)
+        Val = GetVBR(Val, MatcherTable, MatcherIndex);
+      RecordedNodes.push_back(CurDAG->getTargetConstant(Val, VT));
+      continue;
+    }
+    case OPC_EmitRegister: {
+      MVT::SimpleValueType VT =
+        (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+      unsigned RegNo = MatcherTable[MatcherIndex++];
+      RecordedNodes.push_back(CurDAG->getRegister(RegNo, VT));
+      continue;
+    }
+        
+    case OPC_EmitConvertToTarget:  {
+      // Convert from IMM/FPIMM to target version.
+      unsigned RecNo = MatcherTable[MatcherIndex++];
+      assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+      SDValue Imm = RecordedNodes[RecNo];
+
+      if (Imm->getOpcode() == ISD::Constant) {
+        int64_t Val = cast<ConstantSDNode>(Imm)->getZExtValue();
+        Imm = CurDAG->getTargetConstant(Val, Imm.getValueType());
+      } else if (Imm->getOpcode() == ISD::ConstantFP) {
+        const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue();
+        Imm = CurDAG->getTargetConstantFP(*Val, Imm.getValueType());
+      }
+      
+      RecordedNodes.push_back(Imm);
+      continue;
+    }
+        
+    case OPC_EmitMergeInputChains: {
+      assert(InputChain.getNode() == 0 &&
+             "EmitMergeInputChains should be the first chain producing node");
+      // This node gets a list of nodes we matched in the input that have
+      // chains.  We want to token factor all of the input chains to these nodes
+      // together.  However, if any of the input chains is actually one of the
+      // nodes matched in this pattern, then we have an intra-match reference.
+      // Ignore these because the newly token factored chain should not refer to
+      // the old nodes.
+      unsigned NumChains = MatcherTable[MatcherIndex++];
+      assert(NumChains != 0 && "Can't TF zero chains");
+
+      assert(ChainNodesMatched.empty() &&
+             "Should only have one EmitMergeInputChains per match");
+
+      // Read all of the chained nodes.
+      for (unsigned i = 0; i != NumChains; ++i) {
+        unsigned RecNo = MatcherTable[MatcherIndex++];
+        assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+        ChainNodesMatched.push_back(RecordedNodes[RecNo].getNode());
+        
+        // FIXME: What if other value results of the node have uses not matched
+        // by this pattern?
+        if (ChainNodesMatched.back() != NodeToMatch &&
+            !RecordedNodes[RecNo].hasOneUse()) {
+          ChainNodesMatched.clear();
+          break;
+        }
+      }
+      
+      // If the inner loop broke out, the match fails.
+      if (ChainNodesMatched.empty())
+        break;
+
+      // Merge the input chains if they are not intra-pattern references.
+      InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
+      
+      if (InputChain.getNode() == 0)
+        break;  // Failed to merge.
+
+      continue;
+    }
+        
+    case OPC_EmitCopyToReg: {
+      unsigned RecNo = MatcherTable[MatcherIndex++];
+      assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+      unsigned DestPhysReg = MatcherTable[MatcherIndex++];
+      
+      if (InputChain.getNode() == 0)
+        InputChain = CurDAG->getEntryNode();
+      
+      InputChain = CurDAG->getCopyToReg(InputChain, NodeToMatch->getDebugLoc(),
+                                        DestPhysReg, RecordedNodes[RecNo],
+                                        InputFlag);
+      
+      InputFlag = InputChain.getValue(1);
+      continue;
+    }
+        
+    case OPC_EmitNodeXForm: {
+      unsigned XFormNo = MatcherTable[MatcherIndex++];
+      unsigned RecNo = MatcherTable[MatcherIndex++];
+      assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+      RecordedNodes.push_back(RunSDNodeXForm(RecordedNodes[RecNo], XFormNo));
+      continue;
+    }
+        
+    case OPC_EmitNode:
+    case OPC_MorphNodeTo: {
+      uint16_t TargetOpc = MatcherTable[MatcherIndex++];
+      TargetOpc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+      unsigned EmitNodeInfo = MatcherTable[MatcherIndex++];
+      // Get the result VT list.
+      unsigned NumVTs = MatcherTable[MatcherIndex++];
+      SmallVector<EVT, 4> VTs;
+      for (unsigned i = 0; i != NumVTs; ++i) {
+        MVT::SimpleValueType VT =
+          (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+        if (VT == MVT::iPTR) VT = TLI.getPointerTy().SimpleTy;
+        VTs.push_back(VT);
+      }
+      
+      if (EmitNodeInfo & OPFL_Chain)
+        VTs.push_back(MVT::Other);
+      if (EmitNodeInfo & OPFL_FlagOutput)
+        VTs.push_back(MVT::Flag);
+      
+      // This is hot code, so optimize the two most common cases of 1 and 2
+      // results.
+      SDVTList VTList;
+      if (VTs.size() == 1)
+        VTList = CurDAG->getVTList(VTs[0]);
+      else if (VTs.size() == 2)
+        VTList = CurDAG->getVTList(VTs[0], VTs[1]);
+      else
+        VTList = CurDAG->getVTList(VTs.data(), VTs.size());
+
+      // Get the operand list.
+      unsigned NumOps = MatcherTable[MatcherIndex++];
+      SmallVector<SDValue, 8> Ops;
+      for (unsigned i = 0; i != NumOps; ++i) {
+        unsigned RecNo = MatcherTable[MatcherIndex++];
+        if (RecNo & 128)
+          RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
+        
+        assert(RecNo < RecordedNodes.size() && "Invalid EmitNode");
+        Ops.push_back(RecordedNodes[RecNo]);
+      }
+      
+      // If there are variadic operands to add, handle them now.
+      if (EmitNodeInfo & OPFL_VariadicInfo) {
+        // Determine the start index to copy from.
+        unsigned FirstOpToCopy = getNumFixedFromVariadicInfo(EmitNodeInfo);
+        FirstOpToCopy += (EmitNodeInfo & OPFL_Chain) ? 1 : 0;
+        assert(NodeToMatch->getNumOperands() >= FirstOpToCopy &&
+               "Invalid variadic node");
+        // Copy all of the variadic operands, not including a potential flag
+        // input.
+        for (unsigned i = FirstOpToCopy, e = NodeToMatch->getNumOperands();
+             i != e; ++i) {
+          SDValue V = NodeToMatch->getOperand(i);
+          if (V.getValueType() == MVT::Flag) break;
+          Ops.push_back(V);
+        }
+      }
+      
+      // If this has chain/flag inputs, add them.
+      if (EmitNodeInfo & OPFL_Chain)
+        Ops.push_back(InputChain);
+      if ((EmitNodeInfo & OPFL_FlagInput) && InputFlag.getNode() != 0)
+        Ops.push_back(InputFlag);
+      
+      // Create the node.
+      SDNode *Res = 0;
+      if (Opcode != OPC_MorphNodeTo) {
+        // If this is a normal EmitNode command, just create the new node and
+        // add the results to the RecordedNodes list.
+        Res = CurDAG->getMachineNode(TargetOpc, NodeToMatch->getDebugLoc(),
+                                     VTList, Ops.data(), Ops.size());
+        
+        // Add all the non-flag/non-chain results to the RecordedNodes list.
+        for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
+          if (VTs[i] == MVT::Other || VTs[i] == MVT::Flag) break;
+          RecordedNodes.push_back(SDValue(Res, i));
+        }
+        
+      } else {
+        Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops.data(), Ops.size(),
+                        EmitNodeInfo);
+      }
+      
+      // If the node had chain/flag results, update our notion of the current
+      // chain and flag.
+      if (EmitNodeInfo & OPFL_FlagOutput) {
+        InputFlag = SDValue(Res, VTs.size()-1);
+        if (EmitNodeInfo & OPFL_Chain)
+          InputChain = SDValue(Res, VTs.size()-2);
+      } else if (EmitNodeInfo & OPFL_Chain)
+        InputChain = SDValue(Res, VTs.size()-1);
+
+      // If the OPFL_MemRefs flag is set on this node, slap all of the
+      // accumulated memrefs onto it.
+      //
+      // FIXME: This is vastly incorrect for patterns with multiple outputs
+      // instructions that access memory and for ComplexPatterns that match
+      // loads.
+      if (EmitNodeInfo & OPFL_MemRefs) {
+        MachineSDNode::mmo_iterator MemRefs =
+          MF->allocateMemRefsArray(MatchedMemRefs.size());
+        std::copy(MatchedMemRefs.begin(), MatchedMemRefs.end(), MemRefs);
+        cast<MachineSDNode>(Res)
+          ->setMemRefs(MemRefs, MemRefs + MatchedMemRefs.size());
+      }
+      
+      DEBUG(errs() << "  "
+                   << (Opcode == OPC_MorphNodeTo ? "Morphed" : "Created")
+                   << " node: "; Res->dump(CurDAG); errs() << "\n");
+      
+      // If this was a MorphNodeTo then we're completely done!
+      if (Opcode == OPC_MorphNodeTo) {
+        // Update chain and flag uses.
+        UpdateChainsAndFlags(NodeToMatch, InputChain, ChainNodesMatched,
+                             InputFlag, FlagResultNodesMatched, true);
+        return Res;
+      }
+      
+      continue;
+    }
+        
+    case OPC_MarkFlagResults: {
+      unsigned NumNodes = MatcherTable[MatcherIndex++];
+      
+      // Read and remember all the flag-result nodes.
+      for (unsigned i = 0; i != NumNodes; ++i) {
+        unsigned RecNo = MatcherTable[MatcherIndex++];
+        if (RecNo & 128)
+          RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
+
+        assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+        FlagResultNodesMatched.push_back(RecordedNodes[RecNo].getNode());
+      }
+      continue;
+    }
+      
+    case OPC_CompleteMatch: {
+      // The match has been completed, and any new nodes (if any) have been
+      // created.  Patch up references to the matched dag to use the newly
+      // created nodes.
+      unsigned NumResults = MatcherTable[MatcherIndex++];
+
+      for (unsigned i = 0; i != NumResults; ++i) {
+        unsigned ResSlot = MatcherTable[MatcherIndex++];
+        if (ResSlot & 128)
+          ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex);
+        
+        assert(ResSlot < RecordedNodes.size() && "Invalid CheckSame");
+        SDValue Res = RecordedNodes[ResSlot];
+        
+        // FIXME2: Eliminate this horrible hack by fixing the 'Gen' program
+        // after (parallel) on input patterns are removed.  This would also
+        // allow us to stop encoding #results in OPC_CompleteMatch's table
+        // entry.
+        if (NodeToMatch->getNumValues() <= i ||
+            NodeToMatch->getValueType(i) == MVT::Other ||
+            NodeToMatch->getValueType(i) == MVT::Flag)
+          break;
+        assert((NodeToMatch->getValueType(i) == Res.getValueType() ||
+                NodeToMatch->getValueType(i) == MVT::iPTR ||
+                Res.getValueType() == MVT::iPTR ||
+                NodeToMatch->getValueType(i).getSizeInBits() ==
+                    Res.getValueType().getSizeInBits()) &&
+               "invalid replacement");
+        CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res);
+      }
+
+      // If the root node defines a flag, add it to the flag nodes to update
+      // list.
+      if (NodeToMatch->getValueType(NodeToMatch->getNumValues()-1) == MVT::Flag)
+        FlagResultNodesMatched.push_back(NodeToMatch);
+      
+      // Update chain and flag uses.
+      UpdateChainsAndFlags(NodeToMatch, InputChain, ChainNodesMatched,
+                           InputFlag, FlagResultNodesMatched, false);
+      
+      assert(NodeToMatch->use_empty() &&
+             "Didn't replace all uses of the node?");
+      
+      // FIXME: We just return here, which interacts correctly with SelectRoot
+      // above.  We should fix this to not return an SDNode* anymore.
+      return 0;
+    }
+    }
+    
+    // If the code reached this point, then the match failed.  See if there is
+    // another child to try in the current 'Scope', otherwise pop it until we
+    // find a case to check.
+    while (1) {
+      if (MatchScopes.empty()) {
+        CannotYetSelect(NodeToMatch);
+        return 0;
+      }
+
+      // Restore the interpreter state back to the point where the scope was
+      // formed.
+      MatchScope &LastScope = MatchScopes.back();
+      RecordedNodes.resize(LastScope.NumRecordedNodes);
+      NodeStack.clear();
+      NodeStack.append(LastScope.NodeStack.begin(), LastScope.NodeStack.end());
+      N = NodeStack.back();
+
+      DEBUG(errs() << "  Match failed at index " << MatcherIndex
+                   << " continuing at " << LastScope.FailIndex << "\n");
+    
+      if (LastScope.NumMatchedMemRefs != MatchedMemRefs.size())
+        MatchedMemRefs.resize(LastScope.NumMatchedMemRefs);
+      MatcherIndex = LastScope.FailIndex;
+      
+      InputChain = LastScope.InputChain;
+      InputFlag = LastScope.InputFlag;
+      if (!LastScope.HasChainNodesMatched)
+        ChainNodesMatched.clear();
+      if (!LastScope.HasFlagResultNodesMatched)
+        FlagResultNodesMatched.clear();
+
+      // Check to see what the offset is at the new MatcherIndex.  If it is zero
+      // we have reached the end of this scope, otherwise we have another child
+      // in the current scope to try.
+      unsigned NumToSkip = MatcherTable[MatcherIndex++];
+      if (NumToSkip & 128)
+        NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex);
+
+      // If we have another child in this scope to match, update FailIndex and
+      // try it.
+      if (NumToSkip != 0) {
+        LastScope.FailIndex = MatcherIndex+NumToSkip;
+        break;
+      }
+      
+      // End of this scope, pop it and try the next child in the containing
+      // scope.
+      MatchScopes.pop_back();
+    }
+  }
+}
+    
+
+
 void SelectionDAGISel::CannotYetSelect(SDNode *N) {
   std::string msg;
   raw_string_ostream Msg(msg);
   Msg << "Cannot yet select: ";
-  N->printrFull(Msg, CurDAG);
+  
+  if (N->getOpcode() != ISD::INTRINSIC_W_CHAIN &&
+      N->getOpcode() != ISD::INTRINSIC_WO_CHAIN &&
+      N->getOpcode() != ISD::INTRINSIC_VOID) {
+    N->printrFull(Msg, CurDAG);
+  } else {
+    bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other;
+    unsigned iid =
+      cast<ConstantSDNode>(N->getOperand(HasInputChain))->getZExtValue();
+    if (iid < Intrinsic::num_intrinsics)
+      Msg << "intrinsic %" << Intrinsic::getName((Intrinsic::ID)iid);
+    else if (const TargetIntrinsicInfo *TII = TM.getIntrinsicInfo())
+      Msg << "target intrinsic %" << TII->getName(iid);
+    else
+      Msg << "unknown intrinsic #" << iid;
+  }
   llvm_report_error(Msg.str());
 }
 
-void SelectionDAGISel::CannotYetSelectIntrinsic(SDNode *N) {
-  dbgs() << "Cannot yet select: ";
-  unsigned iid =
-    cast<ConstantSDNode>(N->getOperand(N->getOperand(0).getValueType() ==
-                                       MVT::Other))->getZExtValue();
-  if (iid < Intrinsic::num_intrinsics)
-    llvm_report_error("Cannot yet select: intrinsic %" +
-                      Intrinsic::getName((Intrinsic::ID)iid));
-  else if (const TargetIntrinsicInfo *tii = TM.getIntrinsicInfo())
-    llvm_report_error(Twine("Cannot yet select: target intrinsic %") +
-                      tii->getName(iid));
-}
-
 char SelectionDAGISel::ID = 0;
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index d74ec7e..8d0d884 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -28,6 +28,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include <ctype.h>
 using namespace llvm;
 
 namespace llvm {
@@ -540,6 +541,24 @@ TargetLowering::~TargetLowering() {
   delete &TLOF;
 }
 
+/// canOpTrap - Returns true if the operation can trap for the value type.
+/// VT must be a legal type.
+bool TargetLowering::canOpTrap(unsigned Op, EVT VT) const {
+  assert(isTypeLegal(VT));
+  switch (Op) {
+  default:
+    return false;
+  case ISD::FDIV:
+  case ISD::FREM:
+  case ISD::SDIV:
+  case ISD::UDIV:
+  case ISD::SREM:
+  case ISD::UREM:
+    return true;
+  }
+}
+
+
 static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
                                        unsigned &NumIntermediates,
                                        EVT &RegisterVT,
@@ -1423,8 +1442,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
   case ISD::TRUNCATE: {
     // Simplify the input, using demanded bit information, and compute the known
     // zero/one bits live out.
+    unsigned OperandBitWidth =
+      Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
     APInt TruncMask = NewMask;
-    TruncMask.zext(Op.getOperand(0).getValueSizeInBits());
+    TruncMask.zext(OperandBitWidth);
     if (SimplifyDemandedBits(Op.getOperand(0), TruncMask,
                              KnownZero, KnownOne, TLO, Depth+1))
       return true;
@@ -1435,15 +1456,14 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     // on the known demanded bits.
     if (Op.getOperand(0).getNode()->hasOneUse()) {
       SDValue In = Op.getOperand(0);
-      unsigned InBitWidth = In.getValueSizeInBits();
       switch (In.getOpcode()) {
       default: break;
       case ISD::SRL:
         // Shrink SRL by a constant if none of the high bits shifted in are
         // demanded.
         if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1))){
-          APInt HighBits = APInt::getHighBitsSet(InBitWidth,
-                                                 InBitWidth - BitWidth);
+          APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
+                                                 OperandBitWidth - BitWidth);
           HighBits = HighBits.lshr(ShAmt->getZExtValue());
           HighBits.trunc(BitWidth);
           
@@ -1589,7 +1609,7 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
 
   // Fall back to ComputeMaskedBits to catch other known cases.
   EVT OpVT = Val.getValueType();
-  unsigned BitWidth = OpVT.getSizeInBits();
+  unsigned BitWidth = OpVT.getScalarType().getSizeInBits();
   APInt Mask = APInt::getAllOnesValue(BitWidth);
   APInt KnownZero, KnownOne;
   DAG.ComputeMaskedBits(Val, Mask, KnownZero, KnownOne);
@@ -1698,7 +1718,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
                                         Lod->getSrcValue(), 
                                         Lod->getSrcValueOffset() + bestOffset,
-                                        false, NewAlign);
+                                        false, false, NewAlign);
           return DAG.getSetCC(dl, VT, 
                               DAG.getNode(ISD::AND, dl, newVT, NewLoad,
                                       DAG.getConstant(bestMask.trunc(bestWidth),
@@ -1757,7 +1777,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         break;   // todo, be more careful with signed comparisons
       }
     } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
-                (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+               (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
       EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
       unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
       EVT ExtDstTy = N0.getValueType();
@@ -1791,22 +1811,21 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                           Cond);
     } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) &&
                 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
-      
       // SETCC (SETCC), [0|1], [EQ|NE]  -> SETCC
-      if (N0.getOpcode() == ISD::SETCC) {
+      if (N0.getOpcode() == ISD::SETCC &&
+          isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) {
         bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1);
         if (TrueWhenTrue)
-          return N0;
-        
+          return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);        
         // Invert the condition.
         ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
         CC = ISD::getSetCCInverse(CC, 
                                   N0.getOperand(0).getValueType().isInteger());
         return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
       }
-      
+
       if ((N0.getOpcode() == ISD::XOR ||
-            (N0.getOpcode() == ISD::AND && 
+           (N0.getOpcode() == ISD::AND && 
             N0.getOperand(0).getOpcode() == ISD::XOR &&
             N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
           isa<ConstantSDNode>(N0.getOperand(1)) &&
@@ -1829,9 +1848,36 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                               N0.getOperand(0).getOperand(0),
                               N0.getOperand(1));
           }
+
           return DAG.getSetCC(dl, VT, Val, N1,
                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
         }
+      } else if (N1C->getAPIntValue() == 1 &&
+                 (VT == MVT::i1 ||
+                  getBooleanContents() == ZeroOrOneBooleanContent)) {
+        SDValue Op0 = N0;
+        if (Op0.getOpcode() == ISD::TRUNCATE)
+          Op0 = Op0.getOperand(0);
+
+        if ((Op0.getOpcode() == ISD::XOR) &&
+            Op0.getOperand(0).getOpcode() == ISD::SETCC &&
+            Op0.getOperand(1).getOpcode() == ISD::SETCC) {
+          // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
+          Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
+          return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1),
+                              Cond);
+        } else if (Op0.getOpcode() == ISD::AND &&
+                isa<ConstantSDNode>(Op0.getOperand(1)) &&
+                cast<ConstantSDNode>(Op0.getOperand(1))->getAPIntValue() == 1) {
+          // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
+          if (Op0.getValueType() != VT)
+            Op0 = DAG.getNode(ISD::AND, dl, VT,
+                          DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
+                          DAG.getConstant(1, VT));
+          return DAG.getSetCC(dl, VT, Op0,
+                              DAG.getConstant(0, Op0.getValueType()),
+                              Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+        }
       }
     }
     
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index 1d9bda4..ce72b2f 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -662,7 +662,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
   if (!tii_->isTriviallyReMaterializable(DefMI, AA))
     return false;
   bool SawStore = false;
-  if (!DefMI->isSafeToMove(tii_, SawStore, AA))
+  if (!DefMI->isSafeToMove(tii_, AA, SawStore))
     return false;
   if (TID.getNumDefs() != 1)
     return false;
@@ -702,7 +702,8 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
     for (const unsigned* SR = tri_->getSubRegisters(DstReg); *SR; ++SR) {
       if (!li_->hasInterval(*SR))
         continue;
-      DLR = li_->getInterval(*SR).getLiveRangeContaining(DefIdx);
+      const LiveRange *DLR =
+          li_->getInterval(*SR).getLiveRangeContaining(DefIdx);
       if (DLR && DLR->valno->getCopy() == CopyMI)
         DLR->valno->setCopy(0);
     }
@@ -741,9 +742,21 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
       NewMI->addOperand(MO);
     if (MO.isDef() && li_->hasInterval(MO.getReg())) {
       unsigned Reg = MO.getReg();
-      DLR = li_->getInterval(Reg).getLiveRangeContaining(DefIdx);
+      const LiveRange *DLR =
+          li_->getInterval(Reg).getLiveRangeContaining(DefIdx);
       if (DLR && DLR->valno->getCopy() == CopyMI)
         DLR->valno->setCopy(0);
+      // Handle subregs as well
+      if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+        for (const unsigned* SR = tri_->getSubRegisters(Reg); *SR; ++SR) {
+          if (!li_->hasInterval(*SR))
+            continue;
+          const LiveRange *DLR =
+              li_->getInterval(*SR).getLiveRangeContaining(DefIdx);
+          if (DLR && DLR->valno->getCopy() == CopyMI)
+            DLR->valno->setCopy(0);
+        }
+      }
     }
   }
 
@@ -752,6 +765,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
   CopyMI->eraseFromParent();
   ReMatCopies.insert(CopyMI);
   ReMatDefs.insert(DefMI);
+  DEBUG(dbgs() << "Remat: " << *NewMI);
   ++NumReMats;
   return true;
 }
@@ -771,11 +785,16 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg,
     SubIdx = 0;
   }
 
+  // Copy the register use-list before traversing it. We may be adding operands
+  // and invalidating pointers.
+  SmallVector<std::pair<MachineInstr*, unsigned>, 32> reglist;
   for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg),
-         E = mri_->reg_end(); I != E; ) {
-    MachineOperand &O = I.getOperand();
-    MachineInstr *UseMI = &*I;
-    ++I;
+         E = mri_->reg_end(); I != E; ++I)
+    reglist.push_back(std::make_pair(&*I, I.getOperandNo()));
+
+  for (unsigned N=0; N != reglist.size(); ++N) {
+    MachineInstr *UseMI = reglist[N].first;
+    MachineOperand &O = UseMI->getOperand(reglist[N].second);
     unsigned OldSubIdx = O.getSubReg();
     if (DstIsPhys) {
       unsigned UseDstReg = DstReg;
@@ -796,6 +815,19 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg,
 
       O.setReg(UseDstReg);
       O.setSubReg(0);
+      if (OldSubIdx) {
+        // Def and kill of subregister of a virtual register actually defs and
+        // kills the whole register. Add imp-defs and imp-kills as needed.
+        if (O.isDef()) {
+          if(O.isDead())
+            UseMI->addRegisterDead(DstReg, tri_, true);
+          else
+            UseMI->addRegisterDefined(DstReg, tri_);
+        } else if (!O.isUndef() &&
+                   (O.isKill() ||
+                    UseMI->isRegTiedToDefOperand(&O-&UseMI->getOperand(0))))
+          UseMI->addRegisterKilled(DstReg, tri_, true);
+      }
       continue;
     }
 
@@ -1148,12 +1180,14 @@ SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned LargeReg,
   LiveInterval &SmallInt = li_->getInterval(SmallReg);
   unsigned LargeSize = li_->getApproximateInstructionCount(LargeInt);
   unsigned SmallSize = li_->getApproximateInstructionCount(SmallInt);
-  if (SmallSize > Threshold || LargeSize > Threshold)
-    if ((float)std::distance(mri_->use_nodbg_begin(SmallReg),
-                             mri_->use_nodbg_end()) / SmallSize <
-        (float)std::distance(mri_->use_nodbg_begin(LargeReg),
-                             mri_->use_nodbg_end()) / LargeSize)
+  if (LargeSize > Threshold) {
+    unsigned SmallUses = std::distance(mri_->use_nodbg_begin(SmallReg),
+                                       mri_->use_nodbg_end());
+    unsigned LargeUses = std::distance(mri_->use_nodbg_begin(LargeReg),
+                                       mri_->use_nodbg_end());
+    if (SmallUses*LargeSize < LargeUses*SmallSize)
       return false;
+  }
   return true;
 }
 
@@ -1173,6 +1207,8 @@ SimpleRegisterCoalescing::HasIncompatibleSubRegDefUse(MachineInstr *CopyMI,
   for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(VirtReg),
          E = mri_->reg_end(); I != E; ++I) {
     MachineOperand &O = I.getOperand();
+    if (O.isDebug())
+      continue;
     MachineInstr *MI = &*I;
     if (MI == CopyMI || JoinedCopies.count(MI))
       continue;
@@ -1559,7 +1595,10 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
         (isExtSubReg || DstRC->isASubClass()) &&
         !isWinToJoinCrossClass(LargeReg, SmallReg,
                                allocatableRCRegs_[NewRC].count())) {
-      DEBUG(dbgs() << "\tSrc/Dest are different register classes.\n");
+      DEBUG(dbgs() << "\tSrc/Dest are different register classes: "
+                   << SrcRC->getName() << "/"
+                   << DstRC->getName() << " -> "
+                   << NewRC->getName() << ".\n");
       // Allow the coalescer to try again in case either side gets coalesced to
       // a physical register that's compatible with the other side. e.g.
       // r1024 = MOV32to32_ r1025
@@ -1680,6 +1719,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
         (AdjustCopiesBackFrom(SrcInt, DstInt, CopyMI) ||
          RemoveCopyByCommutingDef(SrcInt, DstInt, CopyMI))) {
       JoinedCopies.insert(CopyMI);
+      DEBUG(dbgs() << "Trivial!\n");
       return true;
     }
 
@@ -1839,7 +1879,7 @@ static unsigned ComputeUltimateVN(VNInfo *VNI,
   // If the VN has already been computed, just return it.
   if (ThisValNoAssignments[VN] >= 0)
     return ThisValNoAssignments[VN];
-//  assert(ThisValNoAssignments[VN] != -2 && "Cyclic case?");
+  assert(ThisValNoAssignments[VN] != -2 && "Cyclic value numbers");
 
   // If this val is not a copy from the other val, then it must be a new value
   // number in the destination.
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 8d4d1b2..059e8d6 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -44,7 +44,6 @@ namespace {
     const Type *FunctionContextTy;
     Constant *RegisterFn;
     Constant *UnregisterFn;
-    Constant *ResumeFn;
     Constant *BuiltinSetjmpFn;
     Constant *FrameAddrFn;
     Constant *LSDAAddrFn;
@@ -67,8 +66,8 @@ namespace {
     }
 
   private:
-    void markInvokeCallSite(InvokeInst *II, unsigned InvokeNo,
-                            Value *CallSite,
+    void insertCallSiteStore(Instruction *I, int Number, Value *CallSite);
+    void markInvokeCallSite(InvokeInst *II, int InvokeNo, Value *CallSite,
                             SwitchInst *CatchSwitch);
     void splitLiveRangesLiveAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes);
     bool insertSjLjEHSupport(Function &F);
@@ -107,11 +106,6 @@ bool SjLjEHPass::doInitialization(Module &M) {
                           Type::getVoidTy(M.getContext()),
                           PointerType::getUnqual(FunctionContextTy),
                           (Type *)0);
-  ResumeFn =
-    M.getOrInsertFunction("_Unwind_SjLj_Resume",
-                          Type::getVoidTy(M.getContext()),
-                          VoidPtrTy,
-                          (Type *)0);
   FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress);
   BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp);
   LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda);
@@ -123,12 +117,22 @@ bool SjLjEHPass::doInitialization(Module &M) {
   return true;
 }
 
+/// insertCallSiteStore - Insert a store of the call-site value to the
+/// function context
+void SjLjEHPass::insertCallSiteStore(Instruction *I, int Number,
+                                     Value *CallSite) {
+  ConstantInt *CallSiteNoC = ConstantInt::get(Type::getInt32Ty(I->getContext()),
+                                              Number);
+  // Insert a store of the call-site number
+  new StoreInst(CallSiteNoC, CallSite, true, I);  // volatile
+}
+
 /// markInvokeCallSite - Insert code to mark the call_site for this invoke
-void SjLjEHPass::markInvokeCallSite(InvokeInst *II, unsigned InvokeNo,
+void SjLjEHPass::markInvokeCallSite(InvokeInst *II, int InvokeNo,
                                     Value *CallSite,
                                     SwitchInst *CatchSwitch) {
   ConstantInt *CallSiteNoC= ConstantInt::get(Type::getInt32Ty(II->getContext()),
-                                            InvokeNo);
+                                              InvokeNo);
   // The runtime comes back to the dispatcher with the call_site - 1 in
   // the context. Odd, but there it is.
   ConstantInt *SwitchValC = ConstantInt::get(Type::getInt32Ty(II->getContext()),
@@ -145,8 +149,11 @@ void SjLjEHPass::markInvokeCallSite(InvokeInst *II, unsigned InvokeNo,
     }
   }
 
-  // Insert a store of the invoke num before the invoke
-  new StoreInst(CallSiteNoC, CallSite, true, II);  // volatile
+  // Insert the store of the call site value
+  insertCallSiteStore(II, InvokeNo, CallSite);
+
+  // Record the call site value for the back end so it stays associated with
+  // the invoke.
   CallInst::Create(CallSiteFn, CallSiteNoC, "", II);
 
   // Add a switch case to our unwind block.
@@ -272,8 +279,8 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
   SmallVector<InvokeInst*,16> Invokes;
 
   // Look through the terminators of the basic blocks to find invokes, returns
-  // and unwinds
-  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+  // and unwinds.
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
     if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
       // Remember all return instructions in case we insert an invoke into this
       // function.
@@ -283,6 +290,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
     } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
       Unwinds.push_back(UI);
     }
+  }
   // If we don't have any invokes or unwinds, there's nothing to do.
   if (Unwinds.empty() && Invokes.empty()) return false;
 
@@ -478,24 +486,21 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
     for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
       markInvokeCallSite(Invokes[i], i+1, CallSite, DispatchSwitch);
 
-    // The front end has likely added calls to _Unwind_Resume. We need
-    // to find those calls and mark the call_site as -1 immediately prior.
-    // resume is a noreturn function, so any block that has a call to it
-    // should end in an 'unreachable' instruction with the call immediately
-    // prior. That's how we'll search.
-    // ??? There's got to be a better way. this is fugly.
-    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
-      if ((dyn_cast<UnreachableInst>(BB->getTerminator()))) {
-        BasicBlock::iterator I = BB->getTerminator();
-        // Check the previous instruction and see if it's a resume call
-        if (I == BB->begin()) continue;
-        if (CallInst *CI = dyn_cast<CallInst>(--I)) {
-          if (CI->getCalledFunction() == ResumeFn) {
-            Value *NegativeOne = Constant::getAllOnesValue(Int32Ty);
-            new StoreInst(NegativeOne, CallSite, true, I);  // volatile
-          }
+    // Mark call instructions that aren't nounwind as no-action
+    // (call_site == -1). Skip the entry block, as prior to then, no function
+    // context has been created for this function and any unexpected exceptions
+    // thrown will go directly to the caller's context, which is what we want
+    // anyway, so no need to do anything here.
+    for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) {
+      for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I)
+        if (CallInst *CI = dyn_cast<CallInst>(I)) {
+          // Ignore calls to the EH builtins (eh.selector, eh.exception)
+          Constant *Callee = CI->getCalledFunction();
+          if (Callee != SelectorFn && Callee != ExceptionFn
+              && !CI->doesNotThrow())
+            insertCallSiteStore(CI, -1, CallSite);
         }
-      }
+    }
 
     // Replace all unwinds with a branch to the unwind handler.
     // ??? Should this ever happen with sjlj exceptions?
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index 48bb5af..8a6a727 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -113,7 +113,7 @@ bool StackProtector::RequiresStackProtector() const {
 
         if (const ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) {
           // We apparently only care about character arrays.
-          if (!AT->getElementType()->isInteger(8))
+          if (!AT->getElementType()->isIntegerTy(8))
             continue;
 
           // If an array has more than SSPBufferSize bytes of allocated space,
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index 9ab4058..3223e53 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -403,26 +403,45 @@ TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
             II->RemoveOperand(i);
           }
         }
-        II->RemoveOperand(Idx+1);
-        II->RemoveOperand(Idx);
-      }
+      } else
+        Idx = 0;
+
+      // If Idx is set, the operands at Idx and Idx+1 must be removed.
+      // We reuse the location to avoid expensive RemoveOperand calls.
+
       DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg);
       if (LI != SSAUpdateVals.end()) {
         // This register is defined in the tail block.
         for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
           MachineBasicBlock *SrcBB = LI->second[j].first;
           unsigned SrcReg = LI->second[j].second;
-          II->addOperand(MachineOperand::CreateReg(SrcReg, false));
-          II->addOperand(MachineOperand::CreateMBB(SrcBB));
+          if (Idx != 0) {
+            II->getOperand(Idx).setReg(SrcReg);
+            II->getOperand(Idx+1).setMBB(SrcBB);
+            Idx = 0;
+          } else {
+            II->addOperand(MachineOperand::CreateReg(SrcReg, false));
+            II->addOperand(MachineOperand::CreateMBB(SrcBB));
+          }
         }
       } else {
         // Live in tail block, must also be live in predecessors.
         for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) {
           MachineBasicBlock *SrcBB = TDBBs[j];
-          II->addOperand(MachineOperand::CreateReg(Reg, false));
-          II->addOperand(MachineOperand::CreateMBB(SrcBB));
+          if (Idx != 0) {
+            II->getOperand(Idx).setReg(Reg);
+            II->getOperand(Idx+1).setMBB(SrcBB);
+            Idx = 0;
+          } else {
+            II->addOperand(MachineOperand::CreateReg(Reg, false));
+            II->addOperand(MachineOperand::CreateMBB(SrcBB));
+          }
         }
       }
+      if (Idx != 0) {
+        II->RemoveOperand(Idx+1);
+        II->RemoveOperand(Idx);
+      }
     }
   }
 }
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index a0fccab..e9e998f 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -150,6 +150,11 @@ void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB,
   MBB.insert(I, MI);
 }
 
+bool TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0,
+                                           const MachineInstr *MI1) const {
+  return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
+}
+
 MachineInstr *TargetInstrInfoImpl::duplicate(MachineInstr *Orig,
                                              MachineFunction &MF) const {
   assert(!Orig->getDesc().isNotDuplicable() &&
@@ -157,37 +162,6 @@ MachineInstr *TargetInstrInfoImpl::duplicate(MachineInstr *Orig,
   return MF.CloneMachineInstr(Orig);
 }
 
-bool
-TargetInstrInfoImpl::isIdentical(const MachineInstr *MI,
-                                 const MachineInstr *Other,
-                                 const MachineRegisterInfo *MRI) const {
-  if (MI->getOpcode() != Other->getOpcode() ||
-      MI->getNumOperands() != Other->getNumOperands())
-    return false;
-
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
-    const MachineOperand &OMO = Other->getOperand(i);
-    if (MO.isReg() && MO.isDef()) {
-      assert(OMO.isReg() && OMO.isDef());
-      unsigned Reg = MO.getReg();
-      if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
-        if (Reg != OMO.getReg())
-          return false;
-      } else if (MRI->getRegClass(MO.getReg()) !=
-                 MRI->getRegClass(OMO.getReg()))
-        return false;
-
-      continue;
-    }
-
-    if (!MO.isIdenticalTo(OMO))
-      return false;
-  }
-
-  return true;
-}
-
 unsigned
 TargetInstrInfoImpl::GetFunctionSizeInBytes(const MachineFunction &MF) const {
   unsigned FnSize = 0;
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
new file mode 100644
index 0000000..d127f53
--- /dev/null
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -0,0 +1,902 @@
+//===-- llvm/CodeGen/TargetLoweringObjectFileImpl.cpp - Object File Info --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements classes used to handle lowerings specific to common
+// object file formats.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+using namespace dwarf;
+
+//===----------------------------------------------------------------------===//
+//                                  ELF
+//===----------------------------------------------------------------------===//
+typedef StringMap<const MCSectionELF*> ELFUniqueMapTy;
+
+TargetLoweringObjectFileELF::~TargetLoweringObjectFileELF() {
+  // If we have the section uniquing map, free it.
+  delete (ELFUniqueMapTy*)UniquingMap;
+}
+
+const MCSection *TargetLoweringObjectFileELF::
+getELFSection(StringRef Section, unsigned Type, unsigned Flags,
+              SectionKind Kind, bool IsExplicit) const {
+  if (UniquingMap == 0)
+    UniquingMap = new ELFUniqueMapTy();
+  ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)UniquingMap;
+
+  // Do the lookup, if we have a hit, return it.
+  const MCSectionELF *&Entry = Map[Section];
+  if (Entry) return Entry;
+
+  return Entry = MCSectionELF::Create(Section, Type, Flags, Kind, IsExplicit,
+                                      getContext());
+}
+
+void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
+                                             const TargetMachine &TM) {
+  if (UniquingMap != 0)
+    ((ELFUniqueMapTy*)UniquingMap)->clear();
+  TargetLoweringObjectFile::Initialize(Ctx, TM);
+
+  BSSSection =
+    getELFSection(".bss", MCSectionELF::SHT_NOBITS,
+                  MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
+                  SectionKind::getBSS());
+
+  TextSection =
+    getELFSection(".text", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_EXECINSTR | MCSectionELF::SHF_ALLOC,
+                  SectionKind::getText());
+
+  DataSection =
+    getELFSection(".data", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
+                  SectionKind::getDataRel());
+
+  ReadOnlySection =
+    getELFSection(".rodata", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC,
+                  SectionKind::getReadOnly());
+
+  TLSDataSection =
+    getELFSection(".tdata", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS |
+                  MCSectionELF::SHF_WRITE, SectionKind::getThreadData());
+
+  TLSBSSSection =
+    getELFSection(".tbss", MCSectionELF::SHT_NOBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS |
+                  MCSectionELF::SHF_WRITE, SectionKind::getThreadBSS());
+
+  DataRelSection =
+    getELFSection(".data.rel", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
+                  SectionKind::getDataRel());
+
+  DataRelLocalSection =
+    getELFSection(".data.rel.local", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
+                  SectionKind::getDataRelLocal());
+
+  DataRelROSection =
+    getELFSection(".data.rel.ro", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
+                  SectionKind::getReadOnlyWithRel());
+
+  DataRelROLocalSection =
+    getELFSection(".data.rel.ro.local", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
+                  SectionKind::getReadOnlyWithRelLocal());
+
+  MergeableConst4Section =
+    getELFSection(".rodata.cst4", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE,
+                  SectionKind::getMergeableConst4());
+
+  MergeableConst8Section =
+    getELFSection(".rodata.cst8", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE,
+                  SectionKind::getMergeableConst8());
+
+  MergeableConst16Section =
+    getELFSection(".rodata.cst16", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE,
+                  SectionKind::getMergeableConst16());
+
+  StaticCtorSection =
+    getELFSection(".ctors", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
+                  SectionKind::getDataRel());
+
+  StaticDtorSection =
+    getELFSection(".dtors", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
+                  SectionKind::getDataRel());
+
+  // Exception Handling Sections.
+
+  // FIXME: We're emitting LSDA info into a readonly section on ELF, even though
+  // it contains relocatable pointers.  In PIC mode, this is probably a big
+  // runtime hit for C++ apps.  Either the contents of the LSDA need to be
+  // adjusted or this should be a data section.
+  LSDASection =
+    getELFSection(".gcc_except_table", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC, SectionKind::getReadOnly());
+  EHFrameSection =
+    getELFSection(".eh_frame", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
+                  SectionKind::getDataRel());
+
+  // Debug Info Sections.
+  DwarfAbbrevSection =
+    getELFSection(".debug_abbrev", MCSectionELF::SHT_PROGBITS, 0,
+                  SectionKind::getMetadata());
+  DwarfInfoSection =
+    getELFSection(".debug_info", MCSectionELF::SHT_PROGBITS, 0,
+                  SectionKind::getMetadata());
+  DwarfLineSection =
+    getELFSection(".debug_line", MCSectionELF::SHT_PROGBITS, 0,
+                  SectionKind::getMetadata());
+  DwarfFrameSection =
+    getELFSection(".debug_frame", MCSectionELF::SHT_PROGBITS, 0,
+                  SectionKind::getMetadata());
+  DwarfPubNamesSection =
+    getELFSection(".debug_pubnames", MCSectionELF::SHT_PROGBITS, 0,
+                  SectionKind::getMetadata());
+  DwarfPubTypesSection =
+    getELFSection(".debug_pubtypes", MCSectionELF::SHT_PROGBITS, 0,
+                  SectionKind::getMetadata());
+  DwarfStrSection =
+    getELFSection(".debug_str", MCSectionELF::SHT_PROGBITS, 0,
+                  SectionKind::getMetadata());
+  DwarfLocSection =
+    getELFSection(".debug_loc", MCSectionELF::SHT_PROGBITS, 0,
+                  SectionKind::getMetadata());
+  DwarfARangesSection =
+    getELFSection(".debug_aranges", MCSectionELF::SHT_PROGBITS, 0,
+                  SectionKind::getMetadata());
+  DwarfRangesSection =
+    getELFSection(".debug_ranges", MCSectionELF::SHT_PROGBITS, 0,
+                  SectionKind::getMetadata());
+  DwarfMacroInfoSection =
+    getELFSection(".debug_macinfo", MCSectionELF::SHT_PROGBITS, 0,
+                  SectionKind::getMetadata());
+}
+
+
+static SectionKind
+getELFKindForNamedSection(StringRef Name, SectionKind K) {
+  if (Name.empty() || Name[0] != '.') return K;
+
+  // Some lame default implementation based on some magic section names.
+  if (Name == ".bss" ||
+      Name.startswith(".bss.") ||
+      Name.startswith(".gnu.linkonce.b.") ||
+      Name.startswith(".llvm.linkonce.b.") ||
+      Name == ".sbss" ||
+      Name.startswith(".sbss.") ||
+      Name.startswith(".gnu.linkonce.sb.") ||
+      Name.startswith(".llvm.linkonce.sb."))
+    return SectionKind::getBSS();
+
+  if (Name == ".tdata" ||
+      Name.startswith(".tdata.") ||
+      Name.startswith(".gnu.linkonce.td.") ||
+      Name.startswith(".llvm.linkonce.td."))
+    return SectionKind::getThreadData();
+
+  if (Name == ".tbss" ||
+      Name.startswith(".tbss.") ||
+      Name.startswith(".gnu.linkonce.tb.") ||
+      Name.startswith(".llvm.linkonce.tb."))
+    return SectionKind::getThreadBSS();
+
+  return K;
+}
+
+
+static unsigned getELFSectionType(StringRef Name, SectionKind K) {
+
+  if (Name == ".init_array")
+    return MCSectionELF::SHT_INIT_ARRAY;
+
+  if (Name == ".fini_array")
+    return MCSectionELF::SHT_FINI_ARRAY;
+
+  if (Name == ".preinit_array")
+    return MCSectionELF::SHT_PREINIT_ARRAY;
+
+  if (K.isBSS() || K.isThreadBSS())
+    return MCSectionELF::SHT_NOBITS;
+
+  return MCSectionELF::SHT_PROGBITS;
+}
+
+
+static unsigned
+getELFSectionFlags(SectionKind K) {
+  unsigned Flags = 0;
+
+  if (!K.isMetadata())
+    Flags |= MCSectionELF::SHF_ALLOC;
+
+  if (K.isText())
+    Flags |= MCSectionELF::SHF_EXECINSTR;
+
+  if (K.isWriteable())
+    Flags |= MCSectionELF::SHF_WRITE;
+
+  if (K.isThreadLocal())
+    Flags |= MCSectionELF::SHF_TLS;
+
+  // K.isMergeableConst() is left out to honour PR4650
+  if (K.isMergeableCString() || K.isMergeableConst4() ||
+      K.isMergeableConst8() || K.isMergeableConst16())
+    Flags |= MCSectionELF::SHF_MERGE;
+
+  if (K.isMergeableCString())
+    Flags |= MCSectionELF::SHF_STRINGS;
+
+  return Flags;
+}
+
+
+const MCSection *TargetLoweringObjectFileELF::
+getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+                         Mangler *Mang, const TargetMachine &TM) const {
+  StringRef SectionName = GV->getSection();
+
+  // Infer section flags from the section name if we can.
+  Kind = getELFKindForNamedSection(SectionName, Kind);
+
+  return getELFSection(SectionName,
+                       getELFSectionType(SectionName, Kind),
+                       getELFSectionFlags(Kind), Kind, true);
+}
+
+static const char *getSectionPrefixForUniqueGlobal(SectionKind Kind) {
+  if (Kind.isText())                 return ".gnu.linkonce.t.";
+  if (Kind.isReadOnly())             return ".gnu.linkonce.r.";
+
+  if (Kind.isThreadData())           return ".gnu.linkonce.td.";
+  if (Kind.isThreadBSS())            return ".gnu.linkonce.tb.";
+
+  if (Kind.isDataNoRel())            return ".gnu.linkonce.d.";
+  if (Kind.isDataRelLocal())         return ".gnu.linkonce.d.rel.local.";
+  if (Kind.isDataRel())              return ".gnu.linkonce.d.rel.";
+  if (Kind.isReadOnlyWithRelLocal()) return ".gnu.linkonce.d.rel.ro.local.";
+
+  assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+  return ".gnu.linkonce.d.rel.ro.";
+}
+
+const MCSection *TargetLoweringObjectFileELF::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                       Mangler *Mang, const TargetMachine &TM) const {
+
+  // If this global is linkonce/weak and the target handles this by emitting it
+  // into a 'uniqued' section name, create and return the section now.
+  if (GV->isWeakForLinker() && !Kind.isCommon() && !Kind.isBSS()) {
+    const char *Prefix = getSectionPrefixForUniqueGlobal(Kind);
+    SmallString<128> Name;
+    Name.append(Prefix, Prefix+strlen(Prefix));
+    Mang->getNameWithPrefix(Name, GV, false);
+    return getELFSection(Name.str(), getELFSectionType(Name.str(), Kind),
+                         getELFSectionFlags(Kind), Kind);
+  }
+
+  if (Kind.isText()) return TextSection;
+
+  if (Kind.isMergeable1ByteCString() ||
+      Kind.isMergeable2ByteCString() ||
+      Kind.isMergeable4ByteCString()) {
+
+    // We also need alignment here.
+    // FIXME: this is getting the alignment of the character, not the
+    // alignment of the global!
+    unsigned Align =
+      TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV));
+
+    const char *SizeSpec = ".rodata.str1.";
+    if (Kind.isMergeable2ByteCString())
+      SizeSpec = ".rodata.str2.";
+    else if (Kind.isMergeable4ByteCString())
+      SizeSpec = ".rodata.str4.";
+    else
+      assert(Kind.isMergeable1ByteCString() && "unknown string width");
+
+
+    std::string Name = SizeSpec + utostr(Align);
+    return getELFSection(Name, MCSectionELF::SHT_PROGBITS,
+                         MCSectionELF::SHF_ALLOC |
+                         MCSectionELF::SHF_MERGE |
+                         MCSectionELF::SHF_STRINGS,
+                         Kind);
+  }
+
+  if (Kind.isMergeableConst()) {
+    if (Kind.isMergeableConst4() && MergeableConst4Section)
+      return MergeableConst4Section;
+    if (Kind.isMergeableConst8() && MergeableConst8Section)
+      return MergeableConst8Section;
+    if (Kind.isMergeableConst16() && MergeableConst16Section)
+      return MergeableConst16Section;
+    return ReadOnlySection;  // .const
+  }
+
+  if (Kind.isReadOnly())             return ReadOnlySection;
+
+  if (Kind.isThreadData())           return TLSDataSection;
+  if (Kind.isThreadBSS())            return TLSBSSSection;
+
+  // Note: we claim that common symbols are put in BSSSection, but they are
+  // really emitted with the magic .comm directive, which creates a symbol table
+  // entry but not a section.
+  if (Kind.isBSS() || Kind.isCommon()) return BSSSection;
+
+  if (Kind.isDataNoRel())            return DataSection;
+  if (Kind.isDataRelLocal())         return DataRelLocalSection;
+  if (Kind.isDataRel())              return DataRelSection;
+  if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection;
+
+  assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+  return DataRelROSection;
+}
+
+/// getSectionForConstant - Given a mergeable constant with the
+/// specified size and relocation information, return a section that it
+/// should be placed in.
+const MCSection *TargetLoweringObjectFileELF::
+getSectionForConstant(SectionKind Kind) const {
+  if (Kind.isMergeableConst4() && MergeableConst4Section)
+    return MergeableConst4Section;
+  if (Kind.isMergeableConst8() && MergeableConst8Section)
+    return MergeableConst8Section;
+  if (Kind.isMergeableConst16() && MergeableConst16Section)
+    return MergeableConst16Section;
+  if (Kind.isReadOnly())
+    return ReadOnlySection;
+
+  if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection;
+  assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+  return DataRelROSection;
+}
+
+const MCExpr *TargetLoweringObjectFileELF::
+getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                             MachineModuleInfo *MMI, unsigned Encoding) const {
+
+  if (Encoding & dwarf::DW_EH_PE_indirect) {
+    MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+    SmallString<128> Name;
+    Mang->getNameWithPrefix(Name, GV, true);
+    Name += ".DW.stub";
+
+    // Add information about the stub reference to ELFMMI so that the stub
+    // gets emitted by the asmprinter.
+    MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str());
+    MCSymbol *&StubSym = ELFMMI.getGVStubEntry(Sym);
+    if (StubSym == 0) {
+      Name.clear();
+      Mang->getNameWithPrefix(Name, GV, false);
+      StubSym = getContext().GetOrCreateSymbol(Name.str());
+    }
+
+    return TargetLoweringObjectFile::
+      getSymbolForDwarfReference(Sym, MMI,
+                                 Encoding & ~dwarf::DW_EH_PE_indirect);
+  }
+
+  return TargetLoweringObjectFile::
+    getSymbolForDwarfGlobalReference(GV, Mang, MMI, Encoding);
+}
+
+//===----------------------------------------------------------------------===//
+//                                 MachO
+//===----------------------------------------------------------------------===//
+
+typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy;
+
+TargetLoweringObjectFileMachO::~TargetLoweringObjectFileMachO() {
+  // If we have the MachO uniquing map, free it.
+  delete (MachOUniqueMapTy*)UniquingMap;
+}
+
+
+const MCSectionMachO *TargetLoweringObjectFileMachO::
+getMachOSection(StringRef Segment, StringRef Section,
+                unsigned TypeAndAttributes,
+                unsigned Reserved2, SectionKind Kind) const {
+  // We unique sections by their segment/section pair.  The returned section
+  // may not have the same flags as the requested section, if so this should be
+  // diagnosed by the client as an error.
+
+  // Create the map if it doesn't already exist.
+  if (UniquingMap == 0)
+    UniquingMap = new MachOUniqueMapTy();
+  MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)UniquingMap;
+
+  // Form the name to look up.
+  SmallString<64> Name;
+  Name += Segment;
+  Name.push_back(',');
+  Name += Section;
+
+  // Do the lookup, if we have a hit, return it.
+  const MCSectionMachO *&Entry = Map[Name.str()];
+  if (Entry) return Entry;
+
+  // Otherwise, return a new section.
+  return Entry = MCSectionMachO::Create(Segment, Section, TypeAndAttributes,
+                                        Reserved2, Kind, getContext());
+}
+
+
+void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
+                                               const TargetMachine &TM) {
+  if (UniquingMap != 0)
+    ((MachOUniqueMapTy*)UniquingMap)->clear();
+  TargetLoweringObjectFile::Initialize(Ctx, TM);
+
+  TextSection // .text
+    = getMachOSection("__TEXT", "__text",
+                      MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                      SectionKind::getText());
+  DataSection // .data
+    = getMachOSection("__DATA", "__data", 0, SectionKind::getDataRel());
+
+  CStringSection // .cstring
+    = getMachOSection("__TEXT", "__cstring", MCSectionMachO::S_CSTRING_LITERALS,
+                      SectionKind::getMergeable1ByteCString());
+  UStringSection
+    = getMachOSection("__TEXT","__ustring", 0,
+                      SectionKind::getMergeable2ByteCString());
+  FourByteConstantSection // .literal4
+    = getMachOSection("__TEXT", "__literal4", MCSectionMachO::S_4BYTE_LITERALS,
+                      SectionKind::getMergeableConst4());
+  EightByteConstantSection // .literal8
+    = getMachOSection("__TEXT", "__literal8", MCSectionMachO::S_8BYTE_LITERALS,
+                      SectionKind::getMergeableConst8());
+
+  // ld_classic doesn't support .literal16 in 32-bit mode, and ld64 falls back
+  // to using it in -static mode.
+  SixteenByteConstantSection = 0;
+  if (TM.getRelocationModel() != Reloc::Static &&
+      TM.getTargetData()->getPointerSize() == 32)
+    SixteenByteConstantSection =   // .literal16
+      getMachOSection("__TEXT", "__literal16",MCSectionMachO::S_16BYTE_LITERALS,
+                      SectionKind::getMergeableConst16());
+
+  ReadOnlySection  // .const
+    = getMachOSection("__TEXT", "__const", 0, SectionKind::getReadOnly());
+
+  TextCoalSection
+    = getMachOSection("__TEXT", "__textcoal_nt",
+                      MCSectionMachO::S_COALESCED |
+                      MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                      SectionKind::getText());
+  ConstTextCoalSection
+    = getMachOSection("__TEXT", "__const_coal", MCSectionMachO::S_COALESCED,
+                      SectionKind::getText());
+  ConstDataCoalSection
+    = getMachOSection("__DATA","__const_coal", MCSectionMachO::S_COALESCED,
+                      SectionKind::getText());
+  ConstDataSection  // .const_data
+    = getMachOSection("__DATA", "__const", 0,
+                      SectionKind::getReadOnlyWithRel());
+  DataCoalSection
+    = getMachOSection("__DATA","__datacoal_nt", MCSectionMachO::S_COALESCED,
+                      SectionKind::getDataRel());
+  DataCommonSection
+    = getMachOSection("__DATA","__common", MCSectionMachO::S_ZEROFILL,
+                      SectionKind::getBSS());
+  DataBSSSection
+    = getMachOSection("__DATA","__bss", MCSectionMachO::S_ZEROFILL,
+                    SectionKind::getBSS());
+  
+
+  LazySymbolPointerSection
+    = getMachOSection("__DATA", "__la_symbol_ptr",
+                      MCSectionMachO::S_LAZY_SYMBOL_POINTERS,
+                      SectionKind::getMetadata());
+  NonLazySymbolPointerSection
+    = getMachOSection("__DATA", "__nl_symbol_ptr",
+                      MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
+                      SectionKind::getMetadata());
+
+  if (TM.getRelocationModel() == Reloc::Static) {
+    StaticCtorSection
+      = getMachOSection("__TEXT", "__constructor", 0,SectionKind::getDataRel());
+    StaticDtorSection
+      = getMachOSection("__TEXT", "__destructor", 0, SectionKind::getDataRel());
+  } else {
+    StaticCtorSection
+      = getMachOSection("__DATA", "__mod_init_func",
+                        MCSectionMachO::S_MOD_INIT_FUNC_POINTERS,
+                        SectionKind::getDataRel());
+    StaticDtorSection
+      = getMachOSection("__DATA", "__mod_term_func",
+                        MCSectionMachO::S_MOD_TERM_FUNC_POINTERS,
+                        SectionKind::getDataRel());
+  }
+
+  // Exception Handling.
+  LSDASection = getMachOSection("__DATA", "__gcc_except_tab", 0,
+                                SectionKind::getDataRel());
+  EHFrameSection =
+    getMachOSection("__TEXT", "__eh_frame",
+                    MCSectionMachO::S_COALESCED |
+                    MCSectionMachO::S_ATTR_NO_TOC |
+                    MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS |
+                    MCSectionMachO::S_ATTR_LIVE_SUPPORT,
+                    SectionKind::getReadOnly());
+
+  // Debug Information.
+  DwarfAbbrevSection =
+    getMachOSection("__DWARF", "__debug_abbrev", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+  DwarfInfoSection =
+    getMachOSection("__DWARF", "__debug_info", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+  DwarfLineSection =
+    getMachOSection("__DWARF", "__debug_line", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+  DwarfFrameSection =
+    getMachOSection("__DWARF", "__debug_frame", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+  DwarfPubNamesSection =
+    getMachOSection("__DWARF", "__debug_pubnames", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+  DwarfPubTypesSection =
+    getMachOSection("__DWARF", "__debug_pubtypes", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+  DwarfStrSection =
+    getMachOSection("__DWARF", "__debug_str", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+  DwarfLocSection =
+    getMachOSection("__DWARF", "__debug_loc", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+  DwarfARangesSection =
+    getMachOSection("__DWARF", "__debug_aranges", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+  DwarfRangesSection =
+    getMachOSection("__DWARF", "__debug_ranges", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+  DwarfMacroInfoSection =
+    getMachOSection("__DWARF", "__debug_macinfo", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+  DwarfDebugInlineSection =
+    getMachOSection("__DWARF", "__debug_inlined", MCSectionMachO::S_ATTR_DEBUG,
+                    SectionKind::getMetadata());
+}
+
+const MCSection *TargetLoweringObjectFileMachO::
+getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+                         Mangler *Mang, const TargetMachine &TM) const {
+  // Parse the section specifier and create it if valid.
+  StringRef Segment, Section;
+  unsigned TAA, StubSize;
+  std::string ErrorCode =
+    MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section,
+                                          TAA, StubSize);
+  if (!ErrorCode.empty()) {
+    // If invalid, report the error with llvm_report_error.
+    llvm_report_error("Global variable '" + GV->getNameStr() +
+                      "' has an invalid section specifier '" + GV->getSection()+
+                      "': " + ErrorCode + ".");
+    // Fall back to dropping it into the data section.
+    return DataSection;
+  }
+
+  // Get the section.
+  const MCSectionMachO *S =
+    getMachOSection(Segment, Section, TAA, StubSize, Kind);
+
+  // Okay, now that we got the section, verify that the TAA & StubSize agree.
+  // If the user declared multiple globals with different section flags, we need
+  // to reject it here.
+  if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) {
+    // If invalid, report the error with llvm_report_error.
+    llvm_report_error("Global variable '" + GV->getNameStr() +
+                      "' section type or attributes does not match previous"
+                      " section specifier");
+  }
+
+  return S;
+}
+
+const MCSection *TargetLoweringObjectFileMachO::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                       Mangler *Mang, const TargetMachine &TM) const {
+  assert(!Kind.isThreadLocal() && "Darwin doesn't support TLS");
+
+  if (Kind.isText())
+    return GV->isWeakForLinker() ? TextCoalSection : TextSection;
+
+  // If this is weak/linkonce, put this in a coalescable section, either in text
+  // or data depending on if it is writable.
+  if (GV->isWeakForLinker()) {
+    if (Kind.isReadOnly())
+      return ConstTextCoalSection;
+    return DataCoalSection;
+  }
+
+  // FIXME: Alignment check should be handled by section classifier.
+  if (Kind.isMergeable1ByteCString() &&
+      TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
+    return CStringSection;
+      
+  // Do not put 16-bit arrays in the UString section if they have an
+  // externally visible label, this runs into issues with certain linker
+  // versions.
+  if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() &&
+      TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
+    return UStringSection;
+
+  if (Kind.isMergeableConst()) {
+    if (Kind.isMergeableConst4())
+      return FourByteConstantSection;
+    if (Kind.isMergeableConst8())
+      return EightByteConstantSection;
+    if (Kind.isMergeableConst16() && SixteenByteConstantSection)
+      return SixteenByteConstantSection;
+  }
+
+  // Otherwise, if it is readonly, but not something we can specially optimize,
+  // just drop it in .const.
+  if (Kind.isReadOnly())
+    return ReadOnlySection;
+
+  // If this is marked const, put it into a const section.  But if the dynamic
+  // linker needs to write to it, put it in the data segment.
+  if (Kind.isReadOnlyWithRel())
+    return ConstDataSection;
+
+  // Put zero initialized globals with strong external linkage in the
+  // DATA, __common section with the .zerofill directive.
+  if (Kind.isBSSExtern())
+    return DataCommonSection;
+
+  // Put zero initialized globals with local linkage in __DATA,__bss directive
+  // with the .zerofill directive (aka .lcomm).
+  if (Kind.isBSSLocal())
+    return DataBSSSection;
+  
+  // Otherwise, just drop the variable in the normal data section.
+  return DataSection;
+}
+
+const MCSection *
+TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind) const {
+  // If this constant requires a relocation, we have to put it in the data
+  // segment, not in the text segment.
+  if (Kind.isDataRel() || Kind.isReadOnlyWithRel())
+    return ConstDataSection;
+
+  if (Kind.isMergeableConst4())
+    return FourByteConstantSection;
+  if (Kind.isMergeableConst8())
+    return EightByteConstantSection;
+  if (Kind.isMergeableConst16() && SixteenByteConstantSection)
+    return SixteenByteConstantSection;
+  return ReadOnlySection;  // .const
+}
+
+/// shouldEmitUsedDirectiveFor - This hook allows targets to selectively decide
+/// not to emit the UsedDirective for some symbols in llvm.used.
+// FIXME: REMOVE this (rdar://7071300)
+bool TargetLoweringObjectFileMachO::
+shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *Mang) const {
+  /// On Darwin, internally linked data beginning with "L" or "l" does not have
+  /// the directive emitted (this occurs in ObjC metadata).
+  if (!GV) return false;
+
+  // Check whether the mangled name has the "Private" or "LinkerPrivate" prefix.
+  if (GV->hasLocalLinkage() && !isa<Function>(GV)) {
+    // FIXME: ObjC metadata is currently emitted as internal symbols that have
+    // \1L and \0l prefixes on them.  Fix them to be Private/LinkerPrivate and
+    // this horrible hack can go away.
+    SmallString<64> Name;
+    Mang->getNameWithPrefix(Name, GV, false);
+    if (Name[0] == 'L' || Name[0] == 'l')
+      return false;
+  }
+
+  return true;
+}
+
+const MCExpr *TargetLoweringObjectFileMachO::
+getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                             MachineModuleInfo *MMI, unsigned Encoding) const {
+  // The mach-o version of this method defaults to returning a stub reference.
+
+  if (Encoding & DW_EH_PE_indirect) {
+    MachineModuleInfoMachO &MachOMMI =
+      MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+    SmallString<128> Name;
+    Mang->getNameWithPrefix(Name, GV, true);
+    Name += "$non_lazy_ptr";
+
+    // Add information about the stub reference to MachOMMI so that the stub
+    // gets emitted by the asmprinter.
+    MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str());
+    MCSymbol *&StubSym = MachOMMI.getGVStubEntry(Sym);
+    if (StubSym == 0) {
+      Name.clear();
+      Mang->getNameWithPrefix(Name, GV, false);
+      StubSym = getContext().GetOrCreateSymbol(Name.str());
+    }
+
+    return TargetLoweringObjectFile::
+      getSymbolForDwarfReference(Sym, MMI,
+                                 Encoding & ~dwarf::DW_EH_PE_indirect);
+  }
+
+  return TargetLoweringObjectFile::
+    getSymbolForDwarfGlobalReference(GV, Mang, MMI, Encoding);
+}
+
+unsigned TargetLoweringObjectFileMachO::getPersonalityEncoding() const {
+  return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+}
+
+unsigned TargetLoweringObjectFileMachO::getLSDAEncoding() const {
+  return DW_EH_PE_pcrel;
+}
+
+unsigned TargetLoweringObjectFileMachO::getFDEEncoding() const {
+  return DW_EH_PE_pcrel;
+}
+
+unsigned TargetLoweringObjectFileMachO::getTTypeEncoding() const {
+  return DW_EH_PE_absptr;
+}
+
+//===----------------------------------------------------------------------===//
+//                                  COFF
+//===----------------------------------------------------------------------===//
+
+typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy;
+
+TargetLoweringObjectFileCOFF::~TargetLoweringObjectFileCOFF() {
+  delete (COFFUniqueMapTy*)UniquingMap;
+}
+
+
+const MCSection *TargetLoweringObjectFileCOFF::
+getCOFFSection(StringRef Name, bool isDirective, SectionKind Kind) const {
+  // Create the map if it doesn't already exist.
+  if (UniquingMap == 0)
+    UniquingMap = new MachOUniqueMapTy();
+  COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)UniquingMap;
+
+  // Do the lookup, if we have a hit, return it.
+  const MCSectionCOFF *&Entry = Map[Name];
+  if (Entry) return Entry;
+
+  return Entry = MCSectionCOFF::Create(Name, isDirective, Kind, getContext());
+}
+
+void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
+                                              const TargetMachine &TM) {
+  if (UniquingMap != 0)
+    ((COFFUniqueMapTy*)UniquingMap)->clear();
+  TargetLoweringObjectFile::Initialize(Ctx, TM);
+  TextSection = getCOFFSection("\t.text", true, SectionKind::getText());
+  DataSection = getCOFFSection("\t.data", true, SectionKind::getDataRel());
+  StaticCtorSection =
+    getCOFFSection(".ctors", false, SectionKind::getDataRel());
+  StaticDtorSection =
+    getCOFFSection(".dtors", false, SectionKind::getDataRel());
+
+  // FIXME: We're emitting LSDA info into a readonly section on COFF, even
+  // though it contains relocatable pointers.  In PIC mode, this is probably a
+  // big runtime hit for C++ apps.  Either the contents of the LSDA need to be
+  // adjusted or this should be a data section.
+  LSDASection =
+    getCOFFSection(".gcc_except_table", false, SectionKind::getReadOnly());
+  EHFrameSection =
+    getCOFFSection(".eh_frame", false, SectionKind::getDataRel());
+
+  // Debug info.
+  // FIXME: Don't use 'directive' mode here.
+  DwarfAbbrevSection =
+    getCOFFSection("\t.section\t.debug_abbrev,\"dr\"",
+                   true, SectionKind::getMetadata());
+  DwarfInfoSection =
+    getCOFFSection("\t.section\t.debug_info,\"dr\"",
+                   true, SectionKind::getMetadata());
+  DwarfLineSection =
+    getCOFFSection("\t.section\t.debug_line,\"dr\"",
+                   true, SectionKind::getMetadata());
+  DwarfFrameSection =
+    getCOFFSection("\t.section\t.debug_frame,\"dr\"",
+                   true, SectionKind::getMetadata());
+  DwarfPubNamesSection =
+    getCOFFSection("\t.section\t.debug_pubnames,\"dr\"",
+                   true, SectionKind::getMetadata());
+  DwarfPubTypesSection =
+    getCOFFSection("\t.section\t.debug_pubtypes,\"dr\"",
+                   true, SectionKind::getMetadata());
+  DwarfStrSection =
+    getCOFFSection("\t.section\t.debug_str,\"dr\"",
+                   true, SectionKind::getMetadata());
+  DwarfLocSection =
+    getCOFFSection("\t.section\t.debug_loc,\"dr\"",
+                   true, SectionKind::getMetadata());
+  DwarfARangesSection =
+    getCOFFSection("\t.section\t.debug_aranges,\"dr\"",
+                   true, SectionKind::getMetadata());
+  DwarfRangesSection =
+    getCOFFSection("\t.section\t.debug_ranges,\"dr\"",
+                   true, SectionKind::getMetadata());
+  DwarfMacroInfoSection =
+    getCOFFSection("\t.section\t.debug_macinfo,\"dr\"",
+                   true, SectionKind::getMetadata());
+}
+
+const MCSection *TargetLoweringObjectFileCOFF::
+getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+                         Mangler *Mang, const TargetMachine &TM) const {
+  return getCOFFSection(GV->getSection(), false, Kind);
+}
+
+static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) {
+  if (Kind.isText())
+    return ".text$linkonce";
+  if (Kind.isWriteable())
+    return ".data$linkonce";
+  return ".rdata$linkonce";
+}
+
+
+const MCSection *TargetLoweringObjectFileCOFF::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                       Mangler *Mang, const TargetMachine &TM) const {
+  assert(!Kind.isThreadLocal() && "Doesn't support TLS");
+
+  // If this global is linkonce/weak and the target handles this by emitting it
+  // into a 'uniqued' section name, create and return the section now.
+  if (GV->isWeakForLinker()) {
+    const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind);
+    SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
+    Mang->getNameWithPrefix(Name, GV, false);
+    return getCOFFSection(Name.str(), false, Kind);
+  }
+
+  if (Kind.isText())
+    return getTextSection();
+
+  return getDataSection();
+}
+
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 6c7c1a1..c840b39 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -160,7 +160,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
                                            MachineBasicBlock::iterator OldPos) {
   // Check if it's safe to move this instruction.
   bool SeenStore = true; // Be conservative.
-  if (!MI->isSafeToMove(TII, SeenStore, AA))
+  if (!MI->isSafeToMove(TII, AA, SeenStore))
     return false;
 
   unsigned DefReg = 0;
@@ -213,6 +213,9 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
   unsigned NumVisited = 0;
   for (MachineBasicBlock::iterator I = llvm::next(OldPos); I != KillPos; ++I) {
     MachineInstr *OtherMI = I;
+    // DBG_VALUE cannot be counted against the limit.
+    if (OtherMI->isDebugValue())
+      continue;
     if (NumVisited > 30)  // FIXME: Arbitrary limit to reduce compile time cost.
       return false;
     ++NumVisited;
@@ -451,13 +454,10 @@ MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB,
                                      const TargetInstrInfo *TII,
                                      bool &IsCopy,
                                      unsigned &DstReg, bool &IsDstPhys) {
-  MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg);
-  if (UI == MRI->use_end())
-    return 0;
-  MachineInstr &UseMI = *UI;
-  if (++UI != MRI->use_end())
-    // More than one use.
+  if (!MRI->hasOneNonDBGUse(Reg))
+    // None or more than one use.
     return 0;
+  MachineInstr &UseMI = *MRI->use_nodbg_begin(Reg);
   if (UseMI.getParent() != MBB)
     return 0;
   unsigned SrcReg;
@@ -923,6 +923,10 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
     for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end();
          mi != me; ) {
       MachineBasicBlock::iterator nmi = llvm::next(mi);
+      if (mi->isDebugValue()) {
+        mi = nmi;
+        continue;
+      }
       const TargetInstrDesc &TID = mi->getDesc();
       bool FirstTied = true;
 
@@ -1021,7 +1025,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
           // copying it.
           if (DefMI &&
               DefMI->getDesc().isAsCheapAsAMove() &&
-              DefMI->isSafeToReMat(TII, regB, AA) &&
+              DefMI->isSafeToReMat(TII, AA, regB) &&
               isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){
             DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n");
             unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg();
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 5956b61..ed02696 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -261,19 +261,21 @@ bool VirtRegMap::FindUnusedRegisters(LiveIntervals* LIs) {
 
 void VirtRegMap::print(raw_ostream &OS, const Module* M) const {
   const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
+  const MachineRegisterInfo &MRI = MF->getRegInfo();
 
   OS << "********** REGISTER MAP **********\n";
   for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
          e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i) {
     if (Virt2PhysMap[i] != (unsigned)VirtRegMap::NO_PHYS_REG)
       OS << "[reg" << i << " -> " << TRI->getName(Virt2PhysMap[i])
-         << "]\n";
+         << "] " << MRI.getRegClass(i)->getName() << "\n";
   }
 
   for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
          e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i)
     if (Virt2StackSlotMap[i] != VirtRegMap::NO_STACK_SLOT)
-      OS << "[reg" << i << " -> fi#" << Virt2StackSlotMap[i] << "]\n";
+      OS << "[reg" << i << " -> fi#" << Virt2StackSlotMap[i]
+         << "] " << MRI.getRegClass(i)->getName() << "\n";
   OS << '\n';
 }
 
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index ce62594..7aa0a91 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -46,7 +46,7 @@ namespace {
 
 static cl::opt<RewriterName>
 RewriterOpt("rewriter",
-            cl::desc("Rewriter to use: (default: local)"),
+            cl::desc("Rewriter to use (default=local)"),
             cl::Prefix,
             cl::values(clEnumVal(local,   "local rewriter"),
                        clEnumVal(trivial, "trivial rewriter"),
@@ -62,6 +62,7 @@ VirtRegRewriter::~VirtRegRewriter() {}
 
 /// substitutePhysReg - Replace virtual register in MachineOperand with a
 /// physical register. Do the right thing with the sub-register index.
+/// Note that operands may be added, so the MO reference is no longer valid.
 static void substitutePhysReg(MachineOperand &MO, unsigned Reg,
                               const TargetRegisterInfo &TRI) {
   if (unsigned SubIdx = MO.getSubReg()) {
@@ -123,14 +124,15 @@ struct TrivialRewriter : public VirtRegRewriter {
           continue;
         unsigned pReg = VRM.getPhys(reg);
         mri->setPhysRegUsed(pReg);
-        for (MachineRegisterInfo::reg_iterator regItr = mri->reg_begin(reg),
-             regEnd = mri->reg_end(); regItr != regEnd;) {
-          MachineOperand &mop = regItr.getOperand();
-          assert(mop.isReg() && mop.getReg() == reg && "reg_iterator broken?");
-          ++regItr;
-          substitutePhysReg(mop, pReg, *tri);
-          changed = true;
-        }
+        // Copy the register use-list before traversing it.
+        SmallVector<std::pair<MachineInstr*, unsigned>, 32> reglist;
+        for (MachineRegisterInfo::reg_iterator I = mri->reg_begin(reg),
+               E = mri->reg_end(); I != E; ++I)
+          reglist.push_back(std::make_pair(&*I, I.getOperandNo()));
+        for (unsigned N=0; N != reglist.size(); ++N)
+          substitutePhysReg(reglist[N].first->getOperand(reglist[N].second),
+                            pReg, *tri);
+        changed |= !reglist.empty();
       }
     }
     
@@ -1850,19 +1852,18 @@ private:
       KilledMIRegs.clear();
       for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) {
         unsigned i = VirtUseOps[j];
-        MachineOperand &MO = MI.getOperand(i);
-        unsigned VirtReg = MO.getReg();
+        unsigned VirtReg = MI.getOperand(i).getReg();
         assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
                "Not a virtual register?");
 
-        unsigned SubIdx = MO.getSubReg();
+        unsigned SubIdx = MI.getOperand(i).getSubReg();
         if (VRM.isAssignedReg(VirtReg)) {
           // This virtual register was assigned a physreg!
           unsigned Phys = VRM.getPhys(VirtReg);
           RegInfo->setPhysRegUsed(Phys);
-          if (MO.isDef())
+          if (MI.getOperand(i).isDef())
             ReusedOperands.markClobbered(Phys);
-          substitutePhysReg(MO, Phys, *TRI);
+          substitutePhysReg(MI.getOperand(i), Phys, *TRI);
           if (VRM.isImplicitlyDefined(VirtReg))
             // FIXME: Is this needed?
             BuildMI(MBB, &MI, MI.getDebugLoc(),
@@ -1871,10 +1872,10 @@ private:
         }
 
         // This virtual register is now known to be a spilled value.
-        if (!MO.isUse())
+        if (!MI.getOperand(i).isUse())
           continue;  // Handle defs in the loop below (handle use&def here though)
 
-        bool AvoidReload = MO.isUndef();
+        bool AvoidReload = MI.getOperand(i).isUndef();
         // Check if it is defined by an implicit def. It should not be spilled.
         // Note, this is for correctness reason. e.g.
         // 8   %reg1024<def> = IMPLICIT_DEF
diff --git a/lib/CompilerDriver/Action.cpp b/lib/CompilerDriver/Action.cpp
index 7bcd30a..9d07811 100644
--- a/lib/CompilerDriver/Action.cpp
+++ b/lib/CompilerDriver/Action.cpp
@@ -15,6 +15,7 @@
 #include "llvm/CompilerDriver/BuiltinOptions.h"
 
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SystemUtils.h"
 #include "llvm/System/Program.h"
 #include "llvm/System/TimeValue.h"
 
@@ -24,13 +25,23 @@
 using namespace llvm;
 using namespace llvmc;
 
+namespace llvmc {
+
+extern int Main(int argc, char** argv);
+extern const char* ProgramName;
+
+}
+
 namespace {
   int ExecuteProgram(const std::string& name,
                      const StrVector& args) {
     sys::Path prog = sys::Program::FindProgramByName(name);
 
-    if (prog.isEmpty())
-      throw std::runtime_error("Can't find program '" + name + "'");
+    if (prog.isEmpty()) {
+      prog = FindExecutable(name, ProgramName, (void *)(intptr_t)&Main);
+      if (prog.isEmpty())
+        throw std::runtime_error("Can't find program '" + name + "'");
+    }
     if (!prog.canExecute())
       throw std::runtime_error("Program '" + name + "' is not executable.");
 
diff --git a/lib/CompilerDriver/CompilationGraph.cpp b/lib/CompilerDriver/CompilationGraph.cpp
index 524607b..7d1c7fe 100644
--- a/lib/CompilerDriver/CompilationGraph.cpp
+++ b/lib/CompilerDriver/CompilationGraph.cpp
@@ -34,7 +34,8 @@ namespace llvmc {
 
   const std::string& LanguageMap::GetLanguage(const sys::Path& File) const {
     StringRef suf = File.getSuffix();
-    LanguageMap::const_iterator Lang = this->find(suf);
+    LanguageMap::const_iterator Lang =
+      this->find(suf.empty() ? "*empty*" : suf);
     if (Lang == this->end())
       throw std::runtime_error("File '" + File.str() +
                                 "' has unknown suffix '" + suf.str() + '\'');
@@ -313,7 +314,7 @@ int CompilationGraph::Build (const sys::Path& TempDir,
     JoinTool* JT = &dynamic_cast<JoinTool&>(*CurNode->ToolPtr.getPtr());
 
     // Are there any files in the join list?
-    if (JT->JoinListEmpty())
+    if (JT->JoinListEmpty() && !(JT->WorksOnEmpty() && InputFilenames.empty()))
       continue;
 
     Action CurAction = JT->GenerateAction(CurNode->HasChildren(),
diff --git a/lib/CompilerDriver/Main.cpp b/lib/CompilerDriver/Main.cpp
index 3a3487a..b5e507d 100644
--- a/lib/CompilerDriver/Main.cpp
+++ b/lib/CompilerDriver/Main.cpp
@@ -100,7 +100,8 @@ int Main(int argc, char** argv) {
     ProgramName = argv[0];
 
     cl::ParseCommandLineOptions
-      (argc, argv, "LLVM Compiler Driver (Work In Progress)", true);
+      (argc, argv, "LLVM Compiler Driver (Work In Progress)",
+       /* ReadResponseFiles = */ false);
 
     PluginLoader Plugins;
     Plugins.RunInitialization(langMap, graph);
@@ -126,10 +127,6 @@ int Main(int argc, char** argv) {
       return 0;
     }
 
-    if (InputFilenames.empty()) {
-      throw std::runtime_error("no input files");
-    }
-
     if (Time) {
       GlobalTimeLog = new std::stringstream;
       GlobalTimeLog->precision(2);
diff --git a/lib/CompilerDriver/Tool.cpp b/lib/CompilerDriver/Tool.cpp
index 9f4ab49..5e558ca 100644
--- a/lib/CompilerDriver/Tool.cpp
+++ b/lib/CompilerDriver/Tool.cpp
@@ -17,6 +17,8 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/System/Path.h"
 
+#include <algorithm>
+
 using namespace llvm;
 using namespace llvmc;
 
@@ -71,3 +73,22 @@ sys::Path Tool::OutFilename(const sys::Path& In,
   }
   return Out;
 }
+
+namespace {
+  template <class A, class B>
+  bool CompareFirst (std::pair<A,B> p1, std::pair<A,B> p2) {
+    return std::less<A>()(p1.first, p2.first);
+  }
+}
+
+StrVector Tool::SortArgs(ArgsVector& Args) const {
+  StrVector Out;
+
+  // HACK: this won't be needed when we'll migrate away from CommandLine.
+  std::stable_sort(Args.begin(), Args.end(), &CompareFirst<unsigned, std::string>);
+  for (ArgsVector::iterator B = Args.begin(), E = Args.end(); B != E; ++B) {
+    Out.push_back(B->second);
+  }
+
+  return Out;
+}
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index 3e684e1..b2e2a04 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -339,12 +339,12 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn,
    }
    // FALLS THROUGH
   case 1:
-   if (!FTy->getParamType(0)->isInteger(32)) {
+   if (!FTy->getParamType(0)->isIntegerTy(32)) {
      llvm_report_error("Invalid type for first argument of main() supplied");
    }
    // FALLS THROUGH
   case 0:
-   if (!isa<IntegerType>(FTy->getReturnType()) &&
+   if (!FTy->getReturnType()->isIntegerTy() &&
        !FTy->getReturnType()->isVoidTy()) {
      llvm_report_error("Invalid return type of main() supplied");
    }
@@ -599,22 +599,22 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
       switch (Op0->getType()->getTypeID()) {
         default: llvm_unreachable("Invalid bitcast operand");
         case Type::IntegerTyID:
-          assert(DestTy->isFloatingPoint() && "invalid bitcast");
+          assert(DestTy->isFloatingPointTy() && "invalid bitcast");
           if (DestTy->isFloatTy())
             GV.FloatVal = GV.IntVal.bitsToFloat();
           else if (DestTy->isDoubleTy())
             GV.DoubleVal = GV.IntVal.bitsToDouble();
           break;
         case Type::FloatTyID: 
-          assert(DestTy->isInteger(32) && "Invalid bitcast");
+          assert(DestTy->isIntegerTy(32) && "Invalid bitcast");
           GV.IntVal.floatToBits(GV.FloatVal);
           break;
         case Type::DoubleTyID:
-          assert(DestTy->isInteger(64) && "Invalid bitcast");
+          assert(DestTy->isIntegerTy(64) && "Invalid bitcast");
           GV.IntVal.doubleToBits(GV.DoubleVal);
           break;
         case Type::PointerTyID:
-          assert(isa<PointerType>(DestTy) && "Invalid bitcast");
+          assert(DestTy->isPointerTy() && "Invalid bitcast");
           break; // getConstantValue(Op0)  above already converted it
       }
       return GV;
diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index 141cb27..c7495d4 100644
--- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -87,11 +87,11 @@ void LLVMDisposeGenericValue(LLVMGenericValueRef GenVal) {
 
 /*===-- Operations on execution engines -----------------------------------===*/
 
-LLVMBool LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE,
-                                   LLVMModuleProviderRef MP,
-                                   char **OutError) {
+LLVMBool LLVMCreateExecutionEngineForModule(LLVMExecutionEngineRef *OutEE,
+                                            LLVMModuleRef M,
+                                            char **OutError) {
   std::string Error;
-  EngineBuilder builder(unwrap(MP));
+  EngineBuilder builder(unwrap(M));
   builder.setEngineKind(EngineKind::Either)
          .setErrorStr(&Error);
   if (ExecutionEngine *EE = builder.create()){
@@ -102,11 +102,11 @@ LLVMBool LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE,
   return 1;
 }
 
-LLVMBool LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp,
-                               LLVMModuleProviderRef MP,
-                               char **OutError) {
+LLVMBool LLVMCreateInterpreterForModule(LLVMExecutionEngineRef *OutInterp,
+                                        LLVMModuleRef M,
+                                        char **OutError) {
   std::string Error;
-  EngineBuilder builder(unwrap(MP));
+  EngineBuilder builder(unwrap(M));
   builder.setEngineKind(EngineKind::Interpreter)
          .setErrorStr(&Error);
   if (ExecutionEngine *Interp = builder.create()) {
@@ -117,12 +117,12 @@ LLVMBool LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp,
   return 1;
 }
 
-LLVMBool LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT,
-                               LLVMModuleProviderRef MP,
-                               unsigned OptLevel,
-                               char **OutError) {
+LLVMBool LLVMCreateJITCompilerForModule(LLVMExecutionEngineRef *OutJIT,
+                                        LLVMModuleRef M,
+                                        unsigned OptLevel,
+                                        char **OutError) {
   std::string Error;
-  EngineBuilder builder(unwrap(MP));
+  EngineBuilder builder(unwrap(M));
   builder.setEngineKind(EngineKind::JIT)
          .setErrorStr(&Error)
          .setOptLevel((CodeGenOpt::Level)OptLevel);
@@ -134,6 +134,35 @@ LLVMBool LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT,
   return 1;
 }
 
+LLVMBool LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE,
+                                   LLVMModuleProviderRef MP,
+                                   char **OutError) {
+  /* The module provider is now actually a module. */
+  return LLVMCreateExecutionEngineForModule(OutEE,
+                                            reinterpret_cast<LLVMModuleRef>(MP),
+                                            OutError);
+}
+
+LLVMBool LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp,
+                               LLVMModuleProviderRef MP,
+                               char **OutError) {
+  /* The module provider is now actually a module. */
+  return LLVMCreateInterpreterForModule(OutInterp,
+                                        reinterpret_cast<LLVMModuleRef>(MP),
+                                        OutError);
+}
+
+LLVMBool LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT,
+                               LLVMModuleProviderRef MP,
+                               unsigned OptLevel,
+                               char **OutError) {
+  /* The module provider is now actually a module. */
+  return LLVMCreateJITCompilerForModule(OutJIT,
+                                        reinterpret_cast<LLVMModuleRef>(MP),
+                                        OptLevel, OutError);
+}
+
+
 void LLVMDisposeExecutionEngine(LLVMExecutionEngineRef EE) {
   delete unwrap(EE);
 }
@@ -173,17 +202,29 @@ void LLVMFreeMachineCodeForFunction(LLVMExecutionEngineRef EE, LLVMValueRef F) {
   unwrap(EE)->freeMachineCodeForFunction(unwrap<Function>(F));
 }
 
+void LLVMAddModule(LLVMExecutionEngineRef EE, LLVMModuleRef M){
+  unwrap(EE)->addModule(unwrap(M));
+}
+
 void LLVMAddModuleProvider(LLVMExecutionEngineRef EE, LLVMModuleProviderRef MP){
-  unwrap(EE)->addModule(unwrap(MP));
+  /* The module provider is now actually a module. */
+  LLVMAddModule(EE, reinterpret_cast<LLVMModuleRef>(MP));
+}
+
+LLVMBool LLVMRemoveModule(LLVMExecutionEngineRef EE, LLVMModuleRef M,
+                          LLVMModuleRef *OutMod, char **OutError) {
+  Module *Mod = unwrap(M);
+  unwrap(EE)->removeModule(Mod);
+  *OutMod = wrap(Mod);
+  return 0;
 }
 
 LLVMBool LLVMRemoveModuleProvider(LLVMExecutionEngineRef EE,
                                   LLVMModuleProviderRef MP,
                                   LLVMModuleRef *OutMod, char **OutError) {
-  Module *M = unwrap(MP);
-  unwrap(EE)->removeModule(M);
-  *OutMod = wrap(M);
-  return 0;
+  /* The module provider is now actually a module. */
+  return LLVMRemoveModule(EE, reinterpret_cast<LLVMModuleRef>(MP), OutMod,
+                          OutError);
 }
 
 LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name,
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index 73f5558..a2aad5a 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -591,7 +591,7 @@ void Interpreter::popStackAndReturnValueToCaller(const Type *RetTy,
   ECStack.pop_back();
 
   if (ECStack.empty()) {  // Finished main.  Put result into exit code...
-    if (RetTy && RetTy->isInteger()) {          // Nonvoid return type?
+    if (RetTy && RetTy->isIntegerTy()) {          // Nonvoid return type?
       ExitValue = Result;   // Capture the exit value of the program
     } else {
       memset(&ExitValue.Untyped, 0, sizeof(ExitValue.Untyped));
@@ -761,7 +761,7 @@ void Interpreter::visitAllocaInst(AllocaInst &I) {
 GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I,
                                               gep_type_iterator E,
                                               ExecutionContext &SF) {
-  assert(isa<PointerType>(Ptr->getType()) &&
+  assert(Ptr->getType()->isPointerTy() &&
          "Cannot getElementOffset of a nonpointer type!");
 
   uint64_t Total = 0;
@@ -979,7 +979,7 @@ GenericValue Interpreter::executeFPToUIInst(Value *SrcVal, const Type *DstTy,
   const Type *SrcTy = SrcVal->getType();
   uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  assert(SrcTy->isFloatingPoint() && "Invalid FPToUI instruction");
+  assert(SrcTy->isFloatingPointTy() && "Invalid FPToUI instruction");
 
   if (SrcTy->getTypeID() == Type::FloatTyID)
     Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth);
@@ -993,7 +993,7 @@ GenericValue Interpreter::executeFPToSIInst(Value *SrcVal, const Type *DstTy,
   const Type *SrcTy = SrcVal->getType();
   uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  assert(SrcTy->isFloatingPoint() && "Invalid FPToSI instruction");
+  assert(SrcTy->isFloatingPointTy() && "Invalid FPToSI instruction");
 
   if (SrcTy->getTypeID() == Type::FloatTyID)
     Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth);
@@ -1005,7 +1005,7 @@ GenericValue Interpreter::executeFPToSIInst(Value *SrcVal, const Type *DstTy,
 GenericValue Interpreter::executeUIToFPInst(Value *SrcVal, const Type *DstTy,
                                             ExecutionContext &SF) {
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  assert(DstTy->isFloatingPoint() && "Invalid UIToFP instruction");
+  assert(DstTy->isFloatingPointTy() && "Invalid UIToFP instruction");
 
   if (DstTy->getTypeID() == Type::FloatTyID)
     Dest.FloatVal = APIntOps::RoundAPIntToFloat(Src.IntVal);
@@ -1017,7 +1017,7 @@ GenericValue Interpreter::executeUIToFPInst(Value *SrcVal, const Type *DstTy,
 GenericValue Interpreter::executeSIToFPInst(Value *SrcVal, const Type *DstTy,
                                             ExecutionContext &SF) {
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  assert(DstTy->isFloatingPoint() && "Invalid SIToFP instruction");
+  assert(DstTy->isFloatingPointTy() && "Invalid SIToFP instruction");
 
   if (DstTy->getTypeID() == Type::FloatTyID)
     Dest.FloatVal = APIntOps::RoundSignedAPIntToFloat(Src.IntVal);
@@ -1031,7 +1031,7 @@ GenericValue Interpreter::executePtrToIntInst(Value *SrcVal, const Type *DstTy,
                                               ExecutionContext &SF) {
   uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  assert(isa<PointerType>(SrcVal->getType()) && "Invalid PtrToInt instruction");
+  assert(SrcVal->getType()->isPointerTy() && "Invalid PtrToInt instruction");
 
   Dest.IntVal = APInt(DBitWidth, (intptr_t) Src.PointerVal);
   return Dest;
@@ -1040,7 +1040,7 @@ GenericValue Interpreter::executePtrToIntInst(Value *SrcVal, const Type *DstTy,
 GenericValue Interpreter::executeIntToPtrInst(Value *SrcVal, const Type *DstTy,
                                               ExecutionContext &SF) {
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  assert(isa<PointerType>(DstTy) && "Invalid PtrToInt instruction");
+  assert(DstTy->isPointerTy() && "Invalid PtrToInt instruction");
 
   uint32_t PtrSize = TD.getPointerSizeInBits();
   if (PtrSize != Src.IntVal.getBitWidth())
@@ -1055,27 +1055,27 @@ GenericValue Interpreter::executeBitCastInst(Value *SrcVal, const Type *DstTy,
   
   const Type *SrcTy = SrcVal->getType();
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  if (isa<PointerType>(DstTy)) {
-    assert(isa<PointerType>(SrcTy) && "Invalid BitCast");
+  if (DstTy->isPointerTy()) {
+    assert(SrcTy->isPointerTy() && "Invalid BitCast");
     Dest.PointerVal = Src.PointerVal;
-  } else if (DstTy->isInteger()) {
+  } else if (DstTy->isIntegerTy()) {
     if (SrcTy->isFloatTy()) {
       Dest.IntVal.zext(sizeof(Src.FloatVal) * CHAR_BIT);
       Dest.IntVal.floatToBits(Src.FloatVal);
     } else if (SrcTy->isDoubleTy()) {
       Dest.IntVal.zext(sizeof(Src.DoubleVal) * CHAR_BIT);
       Dest.IntVal.doubleToBits(Src.DoubleVal);
-    } else if (SrcTy->isInteger()) {
+    } else if (SrcTy->isIntegerTy()) {
       Dest.IntVal = Src.IntVal;
     } else 
       llvm_unreachable("Invalid BitCast");
   } else if (DstTy->isFloatTy()) {
-    if (SrcTy->isInteger())
+    if (SrcTy->isIntegerTy())
       Dest.FloatVal = Src.IntVal.bitsToFloat();
     else
       Dest.FloatVal = Src.FloatVal;
   } else if (DstTy->isDoubleTy()) {
-    if (SrcTy->isInteger())
+    if (SrcTy->isIntegerTy())
       Dest.DoubleVal = Src.IntVal.bitsToDouble();
     else
       Dest.DoubleVal = Src.DoubleVal;
diff --git a/lib/ExecutionEngine/JIT/Android.mk b/lib/ExecutionEngine/JIT/Android.mk
new file mode 100644
index 0000000..1c7e27f
--- /dev/null
+++ b/lib/ExecutionEngine/JIT/Android.mk
@@ -0,0 +1,32 @@
+LOCAL_PATH:= $(call my-dir)
+
+# For the host
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES :=	\
+	Intercept.cpp	\
+	JIT.cpp	\
+	JITDebugRegisterer.cpp	\
+	JITDwarfEmitter.cpp	\
+	JITEmitter.cpp	\
+	JITMemoryManager.cpp	\
+	OProfileJITEventListener.cpp	\
+	TargetSelect.cpp
+
+LOCAL_MODULE:= libLLVMJIT
+
+include $(LLVM_HOST_BUILD_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+# For the device
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES :=	\
+	JITMemoryManager.cpp
+
+LOCAL_MODULE:= libLLVMJIT
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index 616a66e..dd74d73 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -18,6 +18,7 @@
 #include "llvm/Function.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/Instructions.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
 #include "llvm/CodeGen/MachineCodeInfo.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
@@ -27,6 +28,7 @@
 #include "llvm/Target/TargetJITInfo.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MutexGuard.h"
 #include "llvm/System/DynamicLibrary.h"
 #include "llvm/Config/config.h"
@@ -237,9 +239,53 @@ ExecutionEngine *JIT::createJIT(Module *M,
   }
 }
 
+namespace {
+/// This class supports the global getPointerToNamedFunction(), which allows
+/// bugpoint or gdb users to search for a function by name without any context.
+class JitPool {
+  SmallPtrSet<JIT*, 1> JITs;  // Optimize for process containing just 1 JIT.
+  mutable sys::Mutex Lock;
+public:
+  void Add(JIT *jit) {
+    MutexGuard guard(Lock);
+    JITs.insert(jit);
+  }
+  void Remove(JIT *jit) {
+    MutexGuard guard(Lock);
+    JITs.erase(jit);
+  }
+  void *getPointerToNamedFunction(const char *Name) const {
+    MutexGuard guard(Lock);
+    assert(JITs.size() != 0 && "No Jit registered");
+    //search function in every instance of JIT
+    for (SmallPtrSet<JIT*, 1>::const_iterator Jit = JITs.begin(),
+           end = JITs.end();
+         Jit != end; ++Jit) {
+      if (Function *F = (*Jit)->FindFunctionNamed(Name))
+        return (*Jit)->getPointerToFunction(F);
+    }
+    // The function is not available : fallback on the first created (will
+    // search in symbol of the current program/library)
+    return (*JITs.begin())->getPointerToNamedFunction(Name);
+  }
+};
+ManagedStatic<JitPool> AllJits;
+}
+extern "C" {
+  // getPointerToNamedFunction - This function is used as a global wrapper to
+  // JIT::getPointerToNamedFunction for the purpose of resolving symbols when
+  // bugpoint is debugging the JIT. In that scenario, we are loading an .so and
+  // need to resolve function(s) that are being mis-codegenerated, so we need to
+  // resolve their addresses at runtime, and this is the way to do it.
+  void *getPointerToNamedFunction(const char *Name) {
+    return AllJits->getPointerToNamedFunction(Name);
+  }
+}
+
 JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
          JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, bool GVsWithCode)
-  : ExecutionEngine(M), TM(tm), TJI(tji), AllocateGVsWithCode(GVsWithCode) {
+  : ExecutionEngine(M), TM(tm), TJI(tji), AllocateGVsWithCode(GVsWithCode),
+    isAlreadyCodeGenerating(false) {
   setTargetData(TM.getTargetData());
 
   jitstate = new JITState(M);
@@ -247,6 +293,9 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
   // Initialize JCE
   JCE = createEmitter(*this, JMM, TM);
 
+  // Register in global list of all JITs.
+  AllJits->Add(this);
+
   // Add target data
   MutexGuard locked(lock);
   FunctionPassManager &PM = jitstate->getPM(locked);
@@ -281,6 +330,7 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
 }
 
 JIT::~JIT() {
+  AllJits->Remove(this);
   delete jitstate;
   delete JCE;
   delete &TM;
@@ -361,12 +411,12 @@ GenericValue JIT::runFunction(Function *F,
 
   // Handle some common cases first.  These cases correspond to common `main'
   // prototypes.
-  if (RetTy->isInteger(32) || RetTy->isVoidTy()) {
+  if (RetTy->isIntegerTy(32) || RetTy->isVoidTy()) {
     switch (ArgValues.size()) {
     case 3:
-      if (FTy->getParamType(0)->isInteger(32) &&
-          isa<PointerType>(FTy->getParamType(1)) &&
-          isa<PointerType>(FTy->getParamType(2))) {
+      if (FTy->getParamType(0)->isIntegerTy(32) &&
+          FTy->getParamType(1)->isPointerTy() &&
+          FTy->getParamType(2)->isPointerTy()) {
         int (*PF)(int, char **, const char **) =
           (int(*)(int, char **, const char **))(intptr_t)FPtr;
 
@@ -379,8 +429,8 @@ GenericValue JIT::runFunction(Function *F,
       }
       break;
     case 2:
-      if (FTy->getParamType(0)->isInteger(32) &&
-          isa<PointerType>(FTy->getParamType(1))) {
+      if (FTy->getParamType(0)->isIntegerTy(32) &&
+          FTy->getParamType(1)->isPointerTy()) {
         int (*PF)(int, char **) = (int(*)(int, char **))(intptr_t)FPtr;
 
         // Call the function.
@@ -392,7 +442,7 @@ GenericValue JIT::runFunction(Function *F,
       break;
     case 1:
       if (FTy->getNumParams() == 1 &&
-          FTy->getParamType(0)->isInteger(32)) {
+          FTy->getParamType(0)->isIntegerTy(32)) {
         GenericValue rv;
         int (*PF)(int) = (int(*)(int))(intptr_t)FPtr;
         rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue()));
@@ -503,8 +553,12 @@ GenericValue JIT::runFunction(Function *F,
   else
     ReturnInst::Create(F->getContext(), StubBB);           // Just return void.
 
-  // Finally, return the value returned by our nullary stub function.
-  return runFunction(Stub, std::vector<GenericValue>());
+  // Finally, call our nullary stub function.
+  GenericValue Result = runFunction(Stub, std::vector<GenericValue>());
+  // Erase it, since no other function can have a reference to it.
+  Stub->eraseFromParent();
+  // And return the result.
+  return Result;
 }
 
 void JIT::RegisterJITEventListener(JITEventListener *L) {
@@ -570,7 +624,6 @@ void JIT::runJITOnFunction(Function *F, MachineCodeInfo *MCI) {
 }
 
 void JIT::runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked) {
-  static bool isAlreadyCodeGenerating = false;
   assert(!isAlreadyCodeGenerating && "Error: Recursive compilation detected!");
 
   // JIT the function
diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h
index bb8f851..edae719 100644
--- a/lib/ExecutionEngine/JIT/JIT.h
+++ b/lib/ExecutionEngine/JIT/JIT.h
@@ -61,6 +61,10 @@ class JIT : public ExecutionEngine {
   /// should be set to true.  Doing so breaks freeMachineCodeForFunction.
   bool AllocateGVsWithCode;
 
+  /// True while the JIT is generating code.  Used to assert against recursive
+  /// entry.
+  bool isAlreadyCodeGenerating;
+
   JITState *jitstate;
 
   JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
index c1051a9..946351b 100644
--- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
@@ -522,7 +522,11 @@ JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const {
       JCE->emitInt64(((intptr_t)Jit.getPointerToGlobal(Personality)));
     }
 
-    JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
+    // LSDA encoding: This must match the encoding used in EmitEHFrame ()
+    if (PointerSize == 4)
+      JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
+    else
+      JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8);
     JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
   } else {
     JCE->emitULEB128Bytes(1);
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index 34a9938..783ebb4 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -37,6 +37,7 @@
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MutexGuard.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
@@ -57,7 +58,6 @@ using namespace llvm;
 STATISTIC(NumBytes, "Number of bytes of machine code compiled");
 STATISTIC(NumRelos, "Number of relocations applied");
 STATISTIC(NumRetries, "Number of retries with more memory");
-static JIT *TheJIT = 0;
 
 
 // A declaration may stop being a declaration once it's fully read from bitcode.
@@ -109,9 +109,13 @@ namespace {
     /// particular GlobalVariable so that we can reuse them if necessary.
     GlobalToIndirectSymMapTy GlobalToIndirectSymMap;
 
+    /// Instance of the JIT this ResolverState serves.
+    JIT *TheJIT;
+
   public:
-    JITResolverState() : FunctionToLazyStubMap(this),
-                         FunctionToCallSitesMap(this) {}
+    JITResolverState(JIT *jit) : FunctionToLazyStubMap(this),
+                                 FunctionToCallSitesMap(this),
+                                 TheJIT(jit) {}
 
     FunctionToLazyStubMapTy& getFunctionToLazyStubMap(
       const MutexGuard& locked) {
@@ -152,53 +156,18 @@ namespace {
     // was no stub.  This function uses the call-site->function map to find a
     // relevant function, but asserts that only stubs and not other call sites
     // will be passed in.
-    Function *EraseStub(const MutexGuard &locked, void *Stub) {
-      CallSiteToFunctionMapTy::iterator C2F_I =
-        CallSiteToFunctionMap.find(Stub);
-      if (C2F_I == CallSiteToFunctionMap.end()) {
-        // Not a stub.
-        return NULL;
-      }
-
-      Function *const F = C2F_I->second;
-#ifndef NDEBUG
-      void *RealStub = FunctionToLazyStubMap.lookup(F);
-      assert(RealStub == Stub &&
-             "Call-site that wasn't a stub pass in to EraseStub");
-#endif
-      FunctionToLazyStubMap.erase(F);
-      CallSiteToFunctionMap.erase(C2F_I);
-
-      // Remove the stub from the function->call-sites map, and remove the whole
-      // entry from the map if that was the last call site.
-      FunctionToCallSitesMapTy::iterator F2C_I = FunctionToCallSitesMap.find(F);
-      assert(F2C_I != FunctionToCallSitesMap.end() &&
-             "FunctionToCallSitesMap broken");
-      bool Erased = F2C_I->second.erase(Stub);
-      (void)Erased;
-      assert(Erased && "FunctionToCallSitesMap broken");
-      if (F2C_I->second.empty())
-        FunctionToCallSitesMap.erase(F2C_I);
-
-      return F;
-    }
+    Function *EraseStub(const MutexGuard &locked, void *Stub);
 
-    void EraseAllCallSites(const MutexGuard &locked, Function *F) {
+    void EraseAllCallSitesFor(const MutexGuard &locked, Function *F) {
       assert(locked.holds(TheJIT->lock));
-      EraseAllCallSitesPrelocked(F);
-    }
-    void EraseAllCallSitesPrelocked(Function *F) {
-      FunctionToCallSitesMapTy::iterator F2C = FunctionToCallSitesMap.find(F);
-      if (F2C == FunctionToCallSitesMap.end())
-        return;
-      for (SmallPtrSet<void*, 1>::const_iterator I = F2C->second.begin(),
-             E = F2C->second.end(); I != E; ++I) {
-        bool Erased = CallSiteToFunctionMap.erase(*I);
-        (void)Erased;
-        assert(Erased && "Missing call site->function mapping");
-      }
-      FunctionToCallSitesMap.erase(F2C);
+      EraseAllCallSitesForPrelocked(F);
     }
+    void EraseAllCallSitesForPrelocked(Function *F);
+
+    // Erases _all_ call sites regardless of their function.  This is used to
+    // unregister the stub addresses from the StubToResolverMap in
+    // ~JITResolver().
+    void EraseAllCallSitesPrelocked();
   };
 
   /// JITResolver - Keep track of, and resolve, call sites for functions that
@@ -227,19 +196,16 @@ namespace {
 
     JITEmitter &JE;
 
-    static JITResolver *TheJITResolver;
-  public:
-    explicit JITResolver(JIT &jit, JITEmitter &je) : nextGOTIndex(0), JE(je) {
-      TheJIT = &jit;
+    /// Instance of JIT corresponding to this Resolver.
+    JIT *TheJIT;
 
+  public:
+    explicit JITResolver(JIT &jit, JITEmitter &je)
+      : state(&jit), nextGOTIndex(0), JE(je), TheJIT(&jit) {
       LazyResolverFn = jit.getJITInfo().getLazyResolverFunction(JITCompilerFn);
-      assert(TheJITResolver == 0 && "Multiple JIT resolvers?");
-      TheJITResolver = this;
     }
 
-    ~JITResolver() {
-      TheJITResolver = 0;
-    }
+    ~JITResolver();
 
     /// getLazyFunctionStubIfAvailable - This returns a pointer to a function's
     /// lazy-compilation stub if it has already been created.
@@ -260,8 +226,6 @@ namespace {
     void getRelocatableGVs(SmallVectorImpl<GlobalValue*> &GVs,
                            SmallVectorImpl<void*> &Ptrs);
 
-    GlobalValue *invalidateStub(void *Stub);
-
     /// getGOTIndexForAddress - Return a new or existing index in the GOT for
     /// an address.  This function only manages slots, it does not manage the
     /// contents of the slots or the memory associated with the GOT.
@@ -273,6 +237,55 @@ namespace {
     static void *JITCompilerFn(void *Stub);
   };
 
+  class StubToResolverMapTy {
+    /// Map a stub address to a specific instance of a JITResolver so that
+    /// lazily-compiled functions can find the right resolver to use.
+    ///
+    /// Guarded by Lock.
+    std::map<void*, JITResolver*> Map;
+
+    /// Guards Map from concurrent accesses.
+    mutable sys::Mutex Lock;
+
+  public:
+    /// Registers a Stub to be resolved by Resolver.
+    void RegisterStubResolver(void *Stub, JITResolver *Resolver) {
+      MutexGuard guard(Lock);
+      Map.insert(std::make_pair(Stub, Resolver));
+    }
+    /// Unregisters the Stub when it's invalidated.
+    void UnregisterStubResolver(void *Stub) {
+      MutexGuard guard(Lock);
+      Map.erase(Stub);
+    }
+    /// Returns the JITResolver instance that owns the Stub.
+    JITResolver *getResolverFromStub(void *Stub) const {
+      MutexGuard guard(Lock);
+      // The address given to us for the stub may not be exactly right, it might
+      // be a little bit after the stub.  As such, use upper_bound to find it.
+      // This is the same trick as in LookupFunctionFromCallSite from
+      // JITResolverState.
+      std::map<void*, JITResolver*>::const_iterator I = Map.upper_bound(Stub);
+      assert(I != Map.begin() && "This is not a known stub!");
+      --I;
+      return I->second;
+    }
+    /// True if any stubs refer to the given resolver. Only used in an assert().
+    /// O(N)
+    bool ResolverHasStubs(JITResolver* Resolver) const {
+      MutexGuard guard(Lock);
+      for (std::map<void*, JITResolver*>::const_iterator I = Map.begin(),
+             E = Map.end(); I != E; ++I) {
+        if (I->second == Resolver)
+          return true;
+      }
+      return false;
+    }
+  };
+  /// This needs to be static so that a lazy call stub can access it with no
+  /// context except the address of the stub.
+  ManagedStatic<StubToResolverMapTy> StubToResolverMap;
+
   /// JITEmitter - The JIT implementation of the MachineCodeEmitter, which is
   /// used to output functions to memory for execution.
   class JITEmitter : public JITCodeEmitter {
@@ -333,9 +346,6 @@ namespace {
     /// MMI - Machine module info for exception informations
     MachineModuleInfo* MMI;
 
-    // GVSet - a set to keep track of which globals have been seen
-    SmallPtrSet<const GlobalVariable*, 8> GVSet;
-
     // CurFn - The llvm function being emitted.  Only valid during
     // finishFunction().
     const Function *CurFn;
@@ -359,22 +369,15 @@ namespace {
     ValueMap<const Function *, EmittedCode,
              EmittedFunctionConfig> EmittedFunctions;
 
-    // CurFnStubUses - For a given Function, a vector of stubs that it
-    // references.  This facilitates the JIT detecting that a stub is no
-    // longer used, so that it may be deallocated.
-    DenseMap<AssertingVH<const Function>, SmallVector<void*, 1> > CurFnStubUses;
-
-    // StubFnRefs - For a given pointer to a stub, a set of Functions which
-    // reference the stub.  When the count of a stub's references drops to zero,
-    // the stub is unused.
-    DenseMap<void *, SmallPtrSet<const Function*, 1> > StubFnRefs;
-
     DILocation PrevDLT;
 
+    /// Instance of the JIT
+    JIT *TheJIT;
+
   public:
     JITEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &TM)
       : SizeEstimate(0), Resolver(jit, *this), MMI(0), CurFn(0),
-        EmittedFunctions(this), PrevDLT(NULL) {
+        EmittedFunctions(this), PrevDLT(NULL), TheJIT(&jit) {
       MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager();
       if (jit.getJITInfo().needsGOT()) {
         MemMgr->AllocateGOT();
@@ -454,11 +457,6 @@ namespace {
     /// function body.
     void deallocateMemForFunction(const Function *F);
 
-    /// AddStubToCurrentFunction - Mark the current function being JIT'd as
-    /// using the stub at the specified address. Allows
-    /// deallocateMemForFunction to also remove stubs no longer referenced.
-    void AddStubToCurrentFunction(void *Stub);
-
     virtual void processDebugLoc(DebugLoc DL, bool BeforePrintingInsn);
 
     virtual void emitLabel(uint64_t LabelID) {
@@ -489,16 +487,86 @@ namespace {
                              bool MayNeedFarStub);
     void *getPointerToGVIndirectSym(GlobalValue *V, void *Reference);
     unsigned addSizeOfGlobal(const GlobalVariable *GV, unsigned Size);
-    unsigned addSizeOfGlobalsInConstantVal(const Constant *C, unsigned Size);
-    unsigned addSizeOfGlobalsInInitializer(const Constant *Init, unsigned Size);
+    unsigned addSizeOfGlobalsInConstantVal(
+      const Constant *C, unsigned Size,
+      SmallPtrSet<const GlobalVariable*, 8> &SeenGlobals,
+      SmallVectorImpl<const GlobalVariable*> &Worklist);
+    unsigned addSizeOfGlobalsInInitializer(
+      const Constant *Init, unsigned Size,
+      SmallPtrSet<const GlobalVariable*, 8> &SeenGlobals,
+      SmallVectorImpl<const GlobalVariable*> &Worklist);
     unsigned GetSizeOfGlobalsInBytes(MachineFunction &MF);
   };
 }
 
-JITResolver *JITResolver::TheJITResolver = 0;
-
 void CallSiteValueMapConfig::onDelete(JITResolverState *JRS, Function *F) {
-  JRS->EraseAllCallSitesPrelocked(F);
+  JRS->EraseAllCallSitesForPrelocked(F);
+}
+
+Function *JITResolverState::EraseStub(const MutexGuard &locked, void *Stub) {
+  CallSiteToFunctionMapTy::iterator C2F_I =
+    CallSiteToFunctionMap.find(Stub);
+  if (C2F_I == CallSiteToFunctionMap.end()) {
+    // Not a stub.
+    return NULL;
+  }
+
+  StubToResolverMap->UnregisterStubResolver(Stub);
+
+  Function *const F = C2F_I->second;
+#ifndef NDEBUG
+  void *RealStub = FunctionToLazyStubMap.lookup(F);
+  assert(RealStub == Stub &&
+         "Call-site that wasn't a stub passed in to EraseStub");
+#endif
+  FunctionToLazyStubMap.erase(F);
+  CallSiteToFunctionMap.erase(C2F_I);
+
+  // Remove the stub from the function->call-sites map, and remove the whole
+  // entry from the map if that was the last call site.
+  FunctionToCallSitesMapTy::iterator F2C_I = FunctionToCallSitesMap.find(F);
+  assert(F2C_I != FunctionToCallSitesMap.end() &&
+         "FunctionToCallSitesMap broken");
+  bool Erased = F2C_I->second.erase(Stub);
+  (void)Erased;
+  assert(Erased && "FunctionToCallSitesMap broken");
+  if (F2C_I->second.empty())
+    FunctionToCallSitesMap.erase(F2C_I);
+
+  return F;
+}
+
+void JITResolverState::EraseAllCallSitesForPrelocked(Function *F) {
+  FunctionToCallSitesMapTy::iterator F2C = FunctionToCallSitesMap.find(F);
+  if (F2C == FunctionToCallSitesMap.end())
+    return;
+  StubToResolverMapTy &S2RMap = *StubToResolverMap;
+  for (SmallPtrSet<void*, 1>::const_iterator I = F2C->second.begin(),
+         E = F2C->second.end(); I != E; ++I) {
+    S2RMap.UnregisterStubResolver(*I);
+    bool Erased = CallSiteToFunctionMap.erase(*I);
+    (void)Erased;
+    assert(Erased && "Missing call site->function mapping");
+  }
+  FunctionToCallSitesMap.erase(F2C);
+}
+
+void JITResolverState::EraseAllCallSitesPrelocked() {
+  StubToResolverMapTy &S2RMap = *StubToResolverMap;
+  for (CallSiteToFunctionMapTy::const_iterator
+         I = CallSiteToFunctionMap.begin(),
+         E = CallSiteToFunctionMap.end(); I != E; ++I) {
+    S2RMap.UnregisterStubResolver(I->first);
+  }
+  CallSiteToFunctionMap.clear();
+  FunctionToCallSitesMap.clear();
+}
+
+JITResolver::~JITResolver() {
+  // No need to lock because we're in the destructor, and state isn't shared.
+  state.EraseAllCallSitesPrelocked();
+  assert(!StubToResolverMap->ResolverHasStubs(this) &&
+         "Resolver destroyed with stubs still alive.");
 }
 
 /// getLazyFunctionStubIfAvailable - This returns a pointer to a function stub
@@ -551,16 +619,22 @@ void *JITResolver::getLazyFunctionStub(Function *F) {
   DEBUG(dbgs() << "JIT: Lazy stub emitted at [" << Stub << "] for function '"
         << F->getName() << "'\n");
 
-  // Finally, keep track of the stub-to-Function mapping so that the
-  // JITCompilerFn knows which function to compile!
-  state.AddCallSite(locked, Stub, F);
-
-  // If we are JIT'ing non-lazily but need to call a function that does not
-  // exist yet, add it to the JIT's work list so that we can fill in the stub
-  // address later.
-  if (!Actual && !TheJIT->isCompilingLazily())
-    if (!isNonGhostDeclaration(F) && !F->hasAvailableExternallyLinkage())
-      TheJIT->addPendingFunction(F);
+  if (TheJIT->isCompilingLazily()) {
+    // Register this JITResolver as the one corresponding to this call site so
+    // JITCompilerFn will be able to find it.
+    StubToResolverMap->RegisterStubResolver(Stub, this);
+
+    // Finally, keep track of the stub-to-Function mapping so that the
+    // JITCompilerFn knows which function to compile!
+    state.AddCallSite(locked, Stub, F);
+  } else if (!Actual) {
+    // If we are JIT'ing non-lazily but need to call a function that does not
+    // exist yet, add it to the JIT's work list so that we can fill in the
+    // stub address later.
+    assert(!isNonGhostDeclaration(F) && !F->hasAvailableExternallyLinkage() &&
+           "'Actual' should have been set above.");
+    TheJIT->addPendingFunction(F);
+  }
 
   return Stub;
 }
@@ -634,44 +708,12 @@ void JITResolver::getRelocatableGVs(SmallVectorImpl<GlobalValue*> &GVs,
   }
 }
 
-GlobalValue *JITResolver::invalidateStub(void *Stub) {
-  MutexGuard locked(TheJIT->lock);
-
-  GlobalToIndirectSymMapTy &GM = state.getGlobalToIndirectSymMap(locked);
-
-  // Look up the cheap way first, to see if it's a function stub we are
-  // invalidating.  If so, remove it from both the forward and reverse maps.
-  if (Function *F = state.EraseStub(locked, Stub)) {
-    return F;
-  }
-
-  // Otherwise, it might be an indirect symbol stub.  Find it and remove it.
-  for (GlobalToIndirectSymMapTy::iterator i = GM.begin(), e = GM.end();
-       i != e; ++i) {
-    if (i->second != Stub)
-      continue;
-    GlobalValue *GV = i->first;
-    GM.erase(i);
-    return GV;
-  }
-
-  // Lastly, check to see if it's in the ExternalFnToStubMap.
-  for (std::map<void *, void *>::iterator i = ExternalFnToStubMap.begin(),
-       e = ExternalFnToStubMap.end(); i != e; ++i) {
-    if (i->second != Stub)
-      continue;
-    ExternalFnToStubMap.erase(i);
-    break;
-  }
-
-  return 0;
-}
-
 /// JITCompilerFn - This function is called when a lazy compilation stub has
 /// been entered.  It looks up which function this stub corresponds to, compiles
 /// it if necessary, then returns the resultant function pointer.
 void *JITResolver::JITCompilerFn(void *Stub) {
-  JITResolver &JR = *TheJITResolver;
+  JITResolver *JR = StubToResolverMap->getResolverFromStub(Stub);
+  assert(JR && "Unable to find the corresponding JITResolver to the call site");
 
   Function* F = 0;
   void* ActualPtr = 0;
@@ -680,24 +722,24 @@ void *JITResolver::JITCompilerFn(void *Stub) {
     // Only lock for getting the Function. The call getPointerToFunction made
     // in this function might trigger function materializing, which requires
     // JIT lock to be unlocked.
-    MutexGuard locked(TheJIT->lock);
+    MutexGuard locked(JR->TheJIT->lock);
 
     // The address given to us for the stub may not be exactly right, it might
     // be a little bit after the stub.  As such, use upper_bound to find it.
     pair<void*, Function*> I =
-      JR.state.LookupFunctionFromCallSite(locked, Stub);
+      JR->state.LookupFunctionFromCallSite(locked, Stub);
     F = I.second;
     ActualPtr = I.first;
   }
 
   // If we have already code generated the function, just return the address.
-  void *Result = TheJIT->getPointerToGlobalIfAvailable(F);
+  void *Result = JR->TheJIT->getPointerToGlobalIfAvailable(F);
 
   if (!Result) {
     // Otherwise we don't have it, do lazy compilation now.
 
     // If lazy compilation is disabled, emit a useful error message and abort.
-    if (!TheJIT->isCompilingLazily()) {
+    if (!JR->TheJIT->isCompilingLazily()) {
       llvm_report_error("LLVM JIT requested to do lazy compilation of function '"
                         + F->getName() + "' when lazy compiles are disabled!");
     }
@@ -706,11 +748,11 @@ void *JITResolver::JITCompilerFn(void *Stub) {
           << "' In stub ptr = " << Stub << " actual ptr = "
           << ActualPtr << "\n");
 
-    Result = TheJIT->getPointerToFunction(F);
+    Result = JR->TheJIT->getPointerToFunction(F);
   }
 
   // Reacquire the lock to update the GOT map.
-  MutexGuard locked(TheJIT->lock);
+  MutexGuard locked(JR->TheJIT->lock);
 
   // We might like to remove the call site from the CallSiteToFunction map, but
   // we can't do that! Multiple threads could be stuck, waiting to acquire the
@@ -725,8 +767,8 @@ void *JITResolver::JITCompilerFn(void *Stub) {
   // if they see it still using the stub address.
   // Note: this is done so the Resolver doesn't have to manage GOT memory
   // Do this without allocating map space if the target isn't using a GOT
-  if(JR.revGOTMap.find(Stub) != JR.revGOTMap.end())
-    JR.revGOTMap[Result] = JR.revGOTMap[Stub];
+  if(JR->revGOTMap.find(Stub) != JR->revGOTMap.end())
+    JR->revGOTMap[Result] = JR->revGOTMap[Stub];
 
   return Result;
 }
@@ -751,7 +793,6 @@ void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference,
     // that we're returning the same address for the function as any previous
     // call.  TODO: Yes, this is wrong. The lazy stub isn't guaranteed to be
     // close enough to call.
-    AddStubToCurrentFunction(FnStub);
     return FnStub;
   }
 
@@ -768,18 +809,10 @@ void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference,
       return TheJIT->getPointerToFunction(F);
   }
 
-  // Otherwise, we may need a to emit a stub, and, conservatively, we
-  // always do so.
-  void *StubAddr = Resolver.getLazyFunctionStub(F);
-
-  // Add the stub to the current function's list of referenced stubs, so we can
-  // deallocate them if the current function is ever freed.  It's possible to
-  // return null from getLazyFunctionStub in the case of a weak extern that
-  // fails to resolve.
-  if (StubAddr)
-    AddStubToCurrentFunction(StubAddr);
-
-  return StubAddr;
+  // Otherwise, we may need a to emit a stub, and, conservatively, we always do
+  // so.  Note that it's possible to return null from getLazyFunctionStub in the
+  // case of a weak extern that fails to resolve.
+  return Resolver.getLazyFunctionStub(F);
 }
 
 void *JITEmitter::getPointerToGVIndirectSym(GlobalValue *V, void *Reference) {
@@ -787,24 +820,9 @@ void *JITEmitter::getPointerToGVIndirectSym(GlobalValue *V, void *Reference) {
   // resolved address.
   void *GVAddress = getPointerToGlobal(V, Reference, false);
   void *StubAddr = Resolver.getGlobalValueIndirectSym(V, GVAddress);
-
-  // Add the stub to the current function's list of referenced stubs, so we can
-  // deallocate them if the current function is ever freed.
-  AddStubToCurrentFunction(StubAddr);
-
   return StubAddr;
 }
 
-void JITEmitter::AddStubToCurrentFunction(void *StubAddr) {
-  assert(CurFn && "Stub added to current function, but current function is 0!");
-
-  SmallVectorImpl<void*> &StubsUsed = CurFnStubUses[CurFn];
-  StubsUsed.push_back(StubAddr);
-
-  SmallPtrSet<const Function *, 1> &FnRefs = StubFnRefs[StubAddr];
-  FnRefs.insert(CurFn);
-}
-
 void JITEmitter::processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) {
   if (!DL.isUnknown()) {
     DILocation CurDLT = EmissionDetails.MF->getDILocation(DL);
@@ -839,7 +857,7 @@ static unsigned GetConstantPoolSizeInBytes(MachineConstantPool *MCP,
   return Size;
 }
 
-static unsigned GetJumpTableSizeInBytes(MachineJumpTableInfo *MJTI) {
+static unsigned GetJumpTableSizeInBytes(MachineJumpTableInfo *MJTI, JIT *jit) {
   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
   if (JT.empty()) return 0;
 
@@ -847,7 +865,7 @@ static unsigned GetJumpTableSizeInBytes(MachineJumpTableInfo *MJTI) {
   for (unsigned i = 0, e = JT.size(); i != e; ++i)
     NumEntries += JT[i].MBBs.size();
 
-  return NumEntries * MJTI->getEntrySize(*TheJIT->getTargetData());
+  return NumEntries * MJTI->getEntrySize(*jit->getTargetData());
 }
 
 static uintptr_t RoundUpToAlign(uintptr_t Size, unsigned Alignment) {
@@ -876,11 +894,14 @@ unsigned JITEmitter::addSizeOfGlobal(const GlobalVariable *GV, unsigned Size) {
 }
 
 /// addSizeOfGlobalsInConstantVal - find any globals that we haven't seen yet
-/// but are referenced from the constant; put them in GVSet and add their
-/// size into the running total Size.
-
-unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C,
-                                              unsigned Size) {
+/// but are referenced from the constant; put them in SeenGlobals and the
+/// Worklist, and add their size into the running total Size.
+
+unsigned JITEmitter::addSizeOfGlobalsInConstantVal(
+    const Constant *C,
+    unsigned Size,
+    SmallPtrSet<const GlobalVariable*, 8> &SeenGlobals,
+    SmallVectorImpl<const GlobalVariable*> &Worklist) {
   // If its undefined, return the garbage.
   if (isa<UndefValue>(C))
     return Size;
@@ -902,7 +923,7 @@ unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C,
     case Instruction::PtrToInt:
     case Instruction::IntToPtr:
     case Instruction::BitCast: {
-      Size = addSizeOfGlobalsInConstantVal(Op0, Size);
+      Size = addSizeOfGlobalsInConstantVal(Op0, Size, SeenGlobals, Worklist);
       break;
     }
     case Instruction::Add:
@@ -918,8 +939,9 @@ unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C,
     case Instruction::And:
     case Instruction::Or:
     case Instruction::Xor: {
-      Size = addSizeOfGlobalsInConstantVal(Op0, Size);
-      Size = addSizeOfGlobalsInConstantVal(CE->getOperand(1), Size);
+      Size = addSizeOfGlobalsInConstantVal(Op0, Size, SeenGlobals, Worklist);
+      Size = addSizeOfGlobalsInConstantVal(CE->getOperand(1), Size,
+                                           SeenGlobals, Worklist);
       break;
     }
     default: {
@@ -933,8 +955,10 @@ unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C,
 
   if (C->getType()->getTypeID() == Type::PointerTyID)
     if (const GlobalVariable* GV = dyn_cast<GlobalVariable>(C))
-      if (GVSet.insert(GV))
+      if (SeenGlobals.insert(GV)) {
+        Worklist.push_back(GV);
         Size = addSizeOfGlobal(GV, Size);
+      }
 
   return Size;
 }
@@ -942,15 +966,18 @@ unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C,
 /// addSizeOfGLobalsInInitializer - handle any globals that we haven't seen yet
 /// but are referenced from the given initializer.
 
-unsigned JITEmitter::addSizeOfGlobalsInInitializer(const Constant *Init,
-                                              unsigned Size) {
+unsigned JITEmitter::addSizeOfGlobalsInInitializer(
+    const Constant *Init,
+    unsigned Size,
+    SmallPtrSet<const GlobalVariable*, 8> &SeenGlobals,
+    SmallVectorImpl<const GlobalVariable*> &Worklist) {
   if (!isa<UndefValue>(Init) &&
       !isa<ConstantVector>(Init) &&
       !isa<ConstantAggregateZero>(Init) &&
       !isa<ConstantArray>(Init) &&
       !isa<ConstantStruct>(Init) &&
       Init->getType()->isFirstClassType())
-    Size = addSizeOfGlobalsInConstantVal(Init, Size);
+    Size = addSizeOfGlobalsInConstantVal(Init, Size, SeenGlobals, Worklist);
   return Size;
 }
 
@@ -961,7 +988,7 @@ unsigned JITEmitter::addSizeOfGlobalsInInitializer(const Constant *Init,
 
 unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) {
   unsigned Size = 0;
-  GVSet.clear();
+  SmallPtrSet<const GlobalVariable*, 8> SeenGlobals;
 
   for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
        MBB != E; ++MBB) {
@@ -985,7 +1012,7 @@ unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) {
           // assuming the addresses of the new globals in this module
           // start at 0 (or something) and adjusting them after codegen
           // complete.  Another possibility is to grab a marker bit in GV.
-          if (GVSet.insert(GV))
+          if (SeenGlobals.insert(GV))
             // A variable as yet unseen.  Add in its size.
             Size = addSizeOfGlobal(GV, Size);
         }
@@ -994,12 +1021,14 @@ unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) {
   }
   DEBUG(dbgs() << "JIT: About to look through initializers\n");
   // Look for more globals that are referenced only from initializers.
-  // GVSet.end is computed each time because the set can grow as we go.
-  for (SmallPtrSet<const GlobalVariable *, 8>::iterator I = GVSet.begin();
-       I != GVSet.end(); I++) {
-    const GlobalVariable* GV = *I;
+  SmallVector<const GlobalVariable*, 8> Worklist(
+    SeenGlobals.begin(), SeenGlobals.end());
+  while (!Worklist.empty()) {
+    const GlobalVariable* GV = Worklist.back();
+    Worklist.pop_back();
     if (GV->hasInitializer())
-      Size = addSizeOfGlobalsInInitializer(GV->getInitializer(), Size);
+      Size = addSizeOfGlobalsInInitializer(GV->getInitializer(), Size,
+                                           SeenGlobals, Worklist);
   }
 
   return Size;
@@ -1032,7 +1061,7 @@ void JITEmitter::startFunction(MachineFunction &F) {
                              MJTI->getEntryAlignment(*TheJIT->getTargetData()));
 
       // Add the jump table size
-      ActualSize += GetJumpTableSizeInBytes(MJTI);
+      ActualSize += GetJumpTableSizeInBytes(MJTI, TheJIT);
     }
 
     // Add the alignment for the function
@@ -1301,40 +1330,6 @@ void JITEmitter::deallocateMemForFunction(const Function *F) {
   if (JITEmitDebugInfo) {
     DR->UnregisterFunction(F);
   }
-
-  // If the function did not reference any stubs, return.
-  if (CurFnStubUses.find(F) == CurFnStubUses.end())
-    return;
-
-  // For each referenced stub, erase the reference to this function, and then
-  // erase the list of referenced stubs.
-  SmallVectorImpl<void *> &StubList = CurFnStubUses[F];
-  for (unsigned i = 0, e = StubList.size(); i != e; ++i) {
-    void *Stub = StubList[i];
-
-    // If we already invalidated this stub for this function, continue.
-    if (StubFnRefs.count(Stub) == 0)
-      continue;
-
-    SmallPtrSet<const Function *, 1> &FnRefs = StubFnRefs[Stub];
-    FnRefs.erase(F);
-
-    // If this function was the last reference to the stub, invalidate the stub
-    // in the JITResolver.  Were there a memory manager deallocateStub routine,
-    // we could call that at this point too.
-    if (FnRefs.empty()) {
-      DEBUG(dbgs() << "\nJIT: Invalidated Stub at [" << Stub << "]\n");
-      StubFnRefs.erase(Stub);
-
-      // Invalidate the stub.  If it is a GV stub, update the JIT's global
-      // mapping for that GV to zero.
-      GlobalValue *GV = Resolver.invalidateStub(Stub);
-      if (GV) {
-        TheJIT->updateGlobalMapping(GV, 0);
-      }
-    }
-  }
-  CurFnStubUses.erase(F);
 }
 
 
@@ -1552,19 +1547,6 @@ JITCodeEmitter *JIT::createEmitter(JIT &jit, JITMemoryManager *JMM,
   return new JITEmitter(jit, JMM, tm);
 }
 
-// getPointerToNamedFunction - This function is used as a global wrapper to
-// JIT::getPointerToNamedFunction for the purpose of resolving symbols when
-// bugpoint is debugging the JIT. In that scenario, we are loading an .so and
-// need to resolve function(s) that are being mis-codegenerated, so we need to
-// resolve their addresses at runtime, and this is the way to do it.
-extern "C" {
-  void *getPointerToNamedFunction(const char *Name) {
-    if (Function *F = TheJIT->FindFunctionNamed(Name))
-      return TheJIT->getPointerToFunction(F);
-    return TheJIT->getPointerToNamedFunction(Name);
-  }
-}
-
 // getPointerToFunctionOrStub - If the specified function has been
 // code-gen'd, return a pointer to the function.  If not, compile it, or use
 // a stub to implement lazy compilation if available.
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index 7f441b0..8487c83 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -159,7 +159,7 @@ static bool RecursiveResolveTypesI(const Type *DstTy, const Type *SrcTy,
   if (DstTy == SrcTy) return false;       // If already equal, noop
 
   // If we found our opaque type, resolve it now!
-  if (isa<OpaqueType>(DstTy) || isa<OpaqueType>(SrcTy))
+  if (DstTy->isOpaqueTy() || SrcTy->isOpaqueTy())
     return ResolveTypes(DstTy, SrcTy);
 
   // Two types cannot be resolved together if they are of different primitive
diff --git a/lib/MC/Android.mk b/lib/MC/Android.mk
new file mode 100644
index 0000000..dac5a54
--- /dev/null
+++ b/lib/MC/Android.mk
@@ -0,0 +1,45 @@
+LOCAL_PATH:= $(call my-dir)
+
+mc_SRC_FILES :=	\
+	MCAsmInfo.cpp	\
+	MCAsmInfoCOFF.cpp	\
+	MCAsmInfoDarwin.cpp	\
+	MCAsmStreamer.cpp	\
+	MCAssembler.cpp	\
+	MCCodeEmitter.cpp	\
+	MCContext.cpp	\
+	MCDisassembler.cpp	\
+	MCExpr.cpp	\
+	MCInst.cpp	\
+	MCInstPrinter.cpp	\
+	MCMachOStreamer.cpp	\
+	MCNullStreamer.cpp	\
+	MCSection.cpp	\
+	MCSectionELF.cpp	\
+	MCSectionMachO.cpp	\
+	MCStreamer.cpp	\
+	MCSymbol.cpp	\
+	MCValue.cpp	\
+	TargetAsmBackend.cpp
+
+# For the host
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(mc_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMMC
+
+include $(LLVM_HOST_BUILD_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+# For the device
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(mc_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMMC
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index 9ead33b..4cf71dc 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -18,4 +18,5 @@ add_llvm_library(LLVMMC
   MCStreamer.cpp
   MCSymbol.cpp
   MCValue.cpp
+  TargetAsmBackend.cpp
   )
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 828377f..1b66900 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -22,6 +22,7 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/FormattedStream.h"
+#include <ctype.h>
 using namespace llvm;
 
 namespace {
@@ -134,6 +135,9 @@ public:
                                     unsigned ValueSize = 1,
                                     unsigned MaxBytesToEmit = 0);
 
+  virtual void EmitCodeAlignment(unsigned ByteAlignment,
+                                 unsigned MaxBytesToEmit = 0);
+
   virtual void EmitValueToOffset(const MCExpr *Offset,
                                  unsigned char Value = 0);
 
@@ -513,6 +517,13 @@ void MCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
   EmitEOL();
 }
 
+void MCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+                                      unsigned MaxBytesToEmit) {
+  // Emit with a text fill value.
+  EmitValueToAlignment(ByteAlignment, MAI.getTextAlignFillValue(),
+                       1, MaxBytesToEmit);
+}
+
 void MCAsmStreamer::EmitValueToOffset(const MCExpr *Offset,
                                       unsigned char Value) {
   // FIXME: Verify that Offset is associated with the current section.
@@ -552,7 +563,7 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) {
 
   for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
     MCFixup &F = Fixups[i];
-    MCFixupKindInfo &Info = Emitter->getFixupKindInfo(F.getKind());
+    const MCFixupKindInfo &Info = Emitter->getFixupKindInfo(F.getKind());
     for (unsigned j = 0; j != Info.TargetSize; ++j) {
       unsigned Index = F.getOffset() * 8 + Info.TargetOffset + j;
       assert(Index < Code.size() * 8 && "Invalid offset in fixup!");
@@ -599,7 +610,7 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) {
 
   for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
     MCFixup &F = Fixups[i];
-    MCFixupKindInfo &Info = Emitter->getFixupKindInfo(F.getKind());
+    const MCFixupKindInfo &Info = Emitter->getFixupKindInfo(F.getKind());
     OS << "  fixup " << char('A' + i) << " - " << "offset: " << F.getOffset()
        << ", value: " << *F.getValue() << ", kind: " << Info.Name << "\n";
   }
@@ -617,6 +628,12 @@ void MCAsmStreamer::EmitInstruction(const MCInst &Inst) {
     raw_ostream &OS = GetCommentOS();
     OS << "<MCInst #" << Inst.getOpcode();
     
+    StringRef InstName;
+    if (InstPrinter)
+      InstName = InstPrinter->getOpcodeName(Inst.getOpcode());
+    if (!InstName.empty())
+      OS << ' ' << InstName;
+    
     for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) {
       OS << "\n  ";
       Inst.getOperand(i).print(OS, &MAI);
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index bdc886b..96227db 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -16,11 +16,17 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MachO.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+
+// FIXME: Gross.
+#include "../Target/X86/X86FixupKinds.h"
+
 #include <vector>
 using namespace llvm;
 
@@ -36,6 +42,8 @@ STATISTIC(EmittedFragments, "Number of emitted assembler fragments");
 static void WriteFileData(raw_ostream &OS, const MCSectionData &SD,
                           MachObjectWriter &MOW);
 
+static uint64_t WriteNopData(uint64_t Count, MachObjectWriter &MOW);
+
 /// isVirtualSection - Check if this is a section which does not actually exist
 /// in the object file.
 static bool isVirtualSection(const MCSection &Section) {
@@ -45,6 +53,30 @@ static bool isVirtualSection(const MCSection &Section) {
   return (Type == MCSectionMachO::S_ZEROFILL);
 }
 
+static unsigned getFixupKindLog2Size(unsigned Kind) {
+  switch (Kind) {
+  default: llvm_unreachable("invalid fixup kind!");
+  case X86::reloc_pcrel_1byte:
+  case FK_Data_1: return 0;
+  case FK_Data_2: return 1;
+  case X86::reloc_pcrel_4byte:
+  case X86::reloc_riprel_4byte:
+  case FK_Data_4: return 2;
+  case FK_Data_8: return 3;
+  }
+}
+
+static bool isFixupKindPCRel(unsigned Kind) {
+  switch (Kind) {
+  default:
+    return false;
+  case X86::reloc_pcrel_1byte:
+  case X86::reloc_pcrel_4byte:
+  case X86::reloc_riprel_4byte:
+    return true;
+  }
+}
+
 class MachObjectWriter {
   // See <mach-o/loader.h>.
   enum {
@@ -402,13 +434,14 @@ public:
     uint32_t Word0;
     uint32_t Word1;
   };
-  void ComputeScatteredRelocationInfo(MCAssembler &Asm,
-                                      MCSectionData::Fixup &Fixup,
+  void ComputeScatteredRelocationInfo(MCAssembler &Asm, MCFragment &Fragment,
+                                      MCAsmFixup &Fixup,
                                       const MCValue &Target,
                              DenseMap<const MCSymbol*,MCSymbolData*> &SymbolMap,
                                      std::vector<MachRelocationEntry> &Relocs) {
-    uint32_t Address = Fixup.Fragment->getOffset() + Fixup.Offset;
+    uint32_t Address = Fragment.getOffset() + Fixup.Offset;
     unsigned IsPCRel = 0;
+    unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind);
     unsigned Type = RIT_Vanilla;
 
     // See <reloc.h>.
@@ -424,11 +457,12 @@ public:
       Value2 = SD->getFragment()->getAddress() + SD->getOffset();
     }
 
-    unsigned Log2Size = Log2_32(Fixup.Size);
-    assert((1U << Log2Size) == Fixup.Size && "Invalid fixup size!");
-
     // The value which goes in the fixup is current value of the expression.
     Fixup.FixedValue = Value - Value2 + Target.getConstant();
+    if (isFixupKindPCRel(Fixup.Kind)) {
+      Fixup.FixedValue -= Address;
+      IsPCRel = 1;
+    }
 
     MachRelocationEntry MRE;
     MRE.Word0 = ((Address   <<  0) |
@@ -453,8 +487,8 @@ public:
     }
   }
 
-  void ComputeRelocationInfo(MCAssembler &Asm,
-                             MCSectionData::Fixup &Fixup,
+  void ComputeRelocationInfo(MCAssembler &Asm, MCDataFragment &Fragment,
+                             MCAsmFixup &Fixup,
                              DenseMap<const MCSymbol*,MCSymbolData*> &SymbolMap,
                              std::vector<MachRelocationEntry> &Relocs) {
     MCValue Target;
@@ -466,19 +500,22 @@ public:
     if (Target.getSymB() ||
         (Target.getSymA() && !Target.getSymA()->isUndefined() &&
          Target.getConstant()))
-      return ComputeScatteredRelocationInfo(Asm, Fixup, Target,
+      return ComputeScatteredRelocationInfo(Asm, Fragment, Fixup, Target,
                                             SymbolMap, Relocs);
 
     // See <reloc.h>.
-    uint32_t Address = Fixup.Fragment->getOffset() + Fixup.Offset;
+    uint32_t Address = Fragment.getOffset() + Fixup.Offset;
     uint32_t Value = 0;
     unsigned Index = 0;
     unsigned IsPCRel = 0;
+    unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind);
     unsigned IsExtern = 0;
     unsigned Type = 0;
 
     if (Target.isAbsolute()) { // constant
       // SymbolNum of 0 indicates the absolute section.
+      //
+      // FIXME: When is this generated?
       Type = RIT_Vanilla;
       Value = 0;
       llvm_unreachable("FIXME: Not yet implemented!");
@@ -495,10 +532,11 @@ public:
         //
         // FIXME: O(N)
         Index = 1;
-        for (MCAssembler::iterator it = Asm.begin(),
-               ie = Asm.end(); it != ie; ++it, ++Index)
+        MCAssembler::iterator it = Asm.begin(), ie = Asm.end();
+        for (; it != ie; ++it, ++Index)
           if (&*it == SD->getFragment()->getParent())
             break;
+        assert(it != ie && "Unable to find section index!");
         Value = SD->getFragment()->getAddress() + SD->getOffset();
       }
 
@@ -508,8 +546,10 @@ public:
     // The value which goes in the fixup is current value of the expression.
     Fixup.FixedValue = Value + Target.getConstant();
 
-    unsigned Log2Size = Log2_32(Fixup.Size);
-    assert((1U << Log2Size) == Fixup.Size && "Invalid fixup size!");
+    if (isFixupKindPCRel(Fixup.Kind)) {
+      Fixup.FixedValue -= Address;
+      IsPCRel = 1;
+    }
 
     // struct relocation_info (8 bytes)
     MachRelocationEntry MRE;
@@ -766,17 +806,20 @@ public:
     // is written.
     std::vector<MachRelocationEntry> RelocInfos;
     uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
-    for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie;
-         ++it) {
+    for (MCAssembler::iterator it = Asm.begin(),
+           ie = Asm.end(); it != ie; ++it) {
       MCSectionData &SD = *it;
 
       // The assembler writes relocations in the reverse order they were seen.
       //
       // FIXME: It is probably more complicated than this.
       unsigned NumRelocsStart = RelocInfos.size();
-      for (unsigned i = 0, e = SD.fixup_size(); i != e; ++i)
-        ComputeRelocationInfo(Asm, SD.getFixups()[e - i - 1], SymbolMap,
-                              RelocInfos);
+      for (MCSectionData::reverse_iterator it2 = SD.rbegin(),
+             ie2 = SD.rend(); it2 != ie2; ++it2)
+        if (MCDataFragment *DF = dyn_cast<MCDataFragment>(&*it2))
+          for (unsigned i = 0, e = DF->fixup_size(); i != e; ++i)
+            ComputeRelocationInfo(Asm, *DF, DF->getFixups()[e - i - 1],
+                                  SymbolMap, RelocInfos);
 
       unsigned NumRelocs = RelocInfos.size() - NumRelocsStart;
       uint64_t SectionStart = SectionDataStart + SD.getAddress();
@@ -871,6 +914,16 @@ public:
       OS << StringTable.str();
     }
   }
+
+  void ApplyFixup(const MCAsmFixup &Fixup, MCDataFragment &DF) {
+    unsigned Size = 1 << getFixupKindLog2Size(Fixup.Kind);
+
+    // FIXME: Endianness assumption.
+    assert(Fixup.Offset + Size <= DF.getContents().size() &&
+           "Invalid fixup offset!");
+    for (unsigned i = 0; i != Size; ++i)
+      DF.getContents()[Fixup.Offset + i] = uint8_t(Fixup.FixedValue >> (i * 8));
+  }
 };
 
 /* *** */
@@ -905,35 +958,12 @@ MCSectionData::MCSectionData(const MCSection &_Section, MCAssembler *A)
     Address(~UINT64_C(0)),
     Size(~UINT64_C(0)),
     FileSize(~UINT64_C(0)),
-    LastFixupLookup(~0),
     HasInstructions(false)
 {
   if (A)
     A->getSectionList().push_back(this);
 }
 
-const MCSectionData::Fixup *
-MCSectionData::LookupFixup(const MCFragment *Fragment, uint64_t Offset) const {
-  // Use a one level cache to turn the common case of accessing the fixups in
-  // order into O(1) instead of O(N).
-  unsigned i = LastFixupLookup, Count = Fixups.size(), End = Fixups.size();
-  if (i >= End)
-    i = 0;
-  while (Count--) {
-    const Fixup &F = Fixups[i];
-    if (F.Fragment == Fragment && F.Offset == Offset) {
-      LastFixupLookup = i;
-      return &F;
-    }
-
-    ++i;
-    if (i == End)
-      i = 0;
-  }
-
-  return 0;
-}
-
 /* *** */
 
 MCSymbolData::MCSymbolData() : Symbol(0) {}
@@ -980,31 +1010,10 @@ void MCAssembler::LayoutSection(MCSectionData &SD) {
     }
 
     case MCFragment::FT_Data:
+    case MCFragment::FT_Fill:
       F.setFileSize(F.getMaxFileSize());
       break;
 
-    case MCFragment::FT_Fill: {
-      MCFillFragment &FF = cast<MCFillFragment>(F);
-
-      F.setFileSize(F.getMaxFileSize());
-
-      MCValue Target;
-      if (!FF.getValue().EvaluateAsRelocatable(Target))
-        llvm_report_error("expected relocatable expression");
-
-      // If the fill value is constant, thats it.
-      if (Target.isAbsolute())
-        break;
-
-      // Otherwise, add fixups for the values.
-      for (uint64_t i = 0, e = FF.getCount(); i != e; ++i) {
-        MCSectionData::Fixup Fix(F, i * FF.getValueSize(),
-                                 FF.getValue(),FF.getValueSize());
-        SD.getFixups().push_back(Fix);
-      }
-      break;
-    }
-
     case MCFragment::FT_Org: {
       MCOrgFragment &OF = cast<MCOrgFragment>(F);
 
@@ -1051,6 +1060,64 @@ void MCAssembler::LayoutSection(MCSectionData &SD) {
     SD.setFileSize(Address - SD.getAddress());
 }
 
+/// WriteNopData - Write optimal nops to the output file for the \arg Count
+/// bytes.  This returns the number of bytes written.  It may return 0 if
+/// the \arg Count is more than the maximum optimal nops.
+///
+/// FIXME this is X86 32-bit specific and should move to a better place.
+static uint64_t WriteNopData(uint64_t Count, MachObjectWriter &MOW) {
+  static const uint8_t Nops[16][16] = {
+    // nop
+    {0x90},
+    // xchg %ax,%ax
+    {0x66, 0x90},
+    // nopl (%[re]ax)
+    {0x0f, 0x1f, 0x00},
+    // nopl 0(%[re]ax)
+    {0x0f, 0x1f, 0x40, 0x00},
+    // nopl 0(%[re]ax,%[re]ax,1)
+    {0x0f, 0x1f, 0x44, 0x00, 0x00},
+    // nopw 0(%[re]ax,%[re]ax,1)
+    {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
+    // nopl 0L(%[re]ax)
+    {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00},
+    // nopl 0L(%[re]ax,%[re]ax,1)
+    {0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+    // nopw 0L(%[re]ax,%[re]ax,1)
+    {0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+    // nopw %cs:0L(%[re]ax,%[re]ax,1)
+    {0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+    // nopl 0(%[re]ax,%[re]ax,1)
+    // nopw 0(%[re]ax,%[re]ax,1)
+    {0x0f, 0x1f, 0x44, 0x00, 0x00,
+     0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
+    // nopw 0(%[re]ax,%[re]ax,1)
+    // nopw 0(%[re]ax,%[re]ax,1)
+    {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00,
+     0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
+    // nopw 0(%[re]ax,%[re]ax,1)
+    // nopl 0L(%[re]ax) */
+    {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00,
+     0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00},
+    // nopl 0L(%[re]ax)
+    // nopl 0L(%[re]ax)
+    {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00,
+     0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00},
+    // nopl 0L(%[re]ax)
+    // nopl 0L(%[re]ax,%[re]ax,1)
+    {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00,
+     0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}
+  };
+
+  if (Count > 15)
+    return 0;
+
+  for (uint64_t i = 0; i < Count; i++)
+    MOW.Write8 (uint8_t(Nops[Count - 1][i]));
+
+  return Count;
+}
+
 /// WriteFileData - Write the \arg F data to the output file.
 static void WriteFileData(raw_ostream &OS, const MCFragment &F,
                           MachObjectWriter &MOW) {
@@ -1074,6 +1141,14 @@ static void WriteFileData(raw_ostream &OS, const MCFragment &F,
                         "' is not a divisor of padding size '" +
                         Twine(AF.getFileSize()) + "'");
 
+    // See if we are aligning with nops, and if so do that first to try to fill
+    // the Count bytes.  Then if that did not fill any bytes or there are any
+    // bytes left to fill use the the Value and ValueSize to fill the rest.
+    if (AF.getEmitNops()) {
+      uint64_t NopByteCount = WriteNopData(Count, MOW);
+      Count -= NopByteCount;
+    }
+
     for (uint64_t i = 0; i != Count; ++i) {
       switch (AF.getValueSize()) {
       default:
@@ -1087,39 +1162,30 @@ static void WriteFileData(raw_ostream &OS, const MCFragment &F,
     break;
   }
 
-  case MCFragment::FT_Data:
+  case MCFragment::FT_Data: {
+    MCDataFragment &DF = cast<MCDataFragment>(F);
+
+    // Apply the fixups.
+    //
+    // FIXME: Move elsewhere.
+    for (MCDataFragment::const_fixup_iterator it = DF.fixup_begin(),
+           ie = DF.fixup_end(); it != ie; ++it)
+      MOW.ApplyFixup(*it, DF);
+
     OS << cast<MCDataFragment>(F).getContents().str();
     break;
+  }
 
   case MCFragment::FT_Fill: {
     MCFillFragment &FF = cast<MCFillFragment>(F);
-
-    int64_t Value = 0;
-
-    MCValue Target;
-    if (!FF.getValue().EvaluateAsRelocatable(Target))
-      llvm_report_error("expected relocatable expression");
-
-    if (Target.isAbsolute())
-      Value = Target.getConstant();
     for (uint64_t i = 0, e = FF.getCount(); i != e; ++i) {
-      if (!Target.isAbsolute()) {
-        // Find the fixup.
-        //
-        // FIXME: Find a better way to write in the fixes.
-        const MCSectionData::Fixup *Fixup =
-          F.getParent()->LookupFixup(&F, i * FF.getValueSize());
-        assert(Fixup && "Missing fixup for fill value!");
-        Value = Fixup->FixedValue;
-      }
-
       switch (FF.getValueSize()) {
       default:
         assert(0 && "Invalid size!");
-      case 1: MOW.Write8 (uint8_t (Value)); break;
-      case 2: MOW.Write16(uint16_t(Value)); break;
-      case 4: MOW.Write32(uint32_t(Value)); break;
-      case 8: MOW.Write64(uint64_t(Value)); break;
+      case 1: MOW.Write8 (uint8_t (FF.getValue())); break;
+      case 2: MOW.Write16(uint16_t(FF.getValue())); break;
+      case 4: MOW.Write32(uint32_t(FF.getValue())); break;
+      case 8: MOW.Write64(uint64_t(FF.getValue())); break;
       }
     }
     break;
@@ -1167,6 +1233,10 @@ static void WriteFileData(raw_ostream &OS, const MCSectionData &SD,
 }
 
 void MCAssembler::Finish() {
+  DEBUG_WITH_TYPE("mc-dump", {
+      llvm::errs() << "assembler backend - pre-layout\n--\n";
+      dump(); });
+
   // Layout the concrete sections and fragments.
   uint64_t Address = 0;
   MCSectionData *Prev = 0;
@@ -1205,9 +1275,149 @@ void MCAssembler::Finish() {
     Address += SD.getSize();
   }
 
+  DEBUG_WITH_TYPE("mc-dump", {
+      llvm::errs() << "assembler backend - post-layout\n--\n";
+      dump(); });
+
   // Write the object file.
   MachObjectWriter MOW(OS);
   MOW.WriteObject(*this);
 
   OS.flush();
 }
+
+
+// Debugging methods
+
+namespace llvm {
+
+raw_ostream &operator<<(raw_ostream &OS, const MCAsmFixup &AF) {
+  OS << "<MCAsmFixup" << " Offset:" << AF.Offset << " Value:" << *AF.Value
+     << " Kind:" << AF.Kind << ">";
+  return OS;
+}
+
+}
+
+void MCFragment::dump() {
+  raw_ostream &OS = llvm::errs();
+
+  OS << "<MCFragment " << (void*) this << " Offset:" << Offset
+     << " FileSize:" << FileSize;
+
+  OS << ">";
+}
+
+void MCAlignFragment::dump() {
+  raw_ostream &OS = llvm::errs();
+
+  OS << "<MCAlignFragment ";
+  this->MCFragment::dump();
+  OS << "\n       ";
+  OS << " Alignment:" << getAlignment()
+     << " Value:" << getValue() << " ValueSize:" << getValueSize()
+     << " MaxBytesToEmit:" << getMaxBytesToEmit() << ">";
+}
+
+void MCDataFragment::dump() {
+  raw_ostream &OS = llvm::errs();
+
+  OS << "<MCDataFragment ";
+  this->MCFragment::dump();
+  OS << "\n       ";
+  OS << " Contents:[";
+  for (unsigned i = 0, e = getContents().size(); i != e; ++i) {
+    if (i) OS << ",";
+    OS << hexdigit((Contents[i] >> 4) & 0xF) << hexdigit(Contents[i] & 0xF);
+  }
+  OS << "] (" << getContents().size() << " bytes)";
+
+  if (!getFixups().empty()) {
+    OS << ",\n       ";
+    OS << " Fixups:[";
+    for (fixup_iterator it = fixup_begin(), ie = fixup_end(); it != ie; ++it) {
+      if (it != fixup_begin()) OS << ",\n            ";
+      OS << *it;
+    }
+    OS << "]";
+  }
+
+  OS << ">";
+}
+
+void MCFillFragment::dump() {
+  raw_ostream &OS = llvm::errs();
+
+  OS << "<MCFillFragment ";
+  this->MCFragment::dump();
+  OS << "\n       ";
+  OS << " Value:" << getValue() << " ValueSize:" << getValueSize()
+     << " Count:" << getCount() << ">";
+}
+
+void MCOrgFragment::dump() {
+  raw_ostream &OS = llvm::errs();
+
+  OS << "<MCOrgFragment ";
+  this->MCFragment::dump();
+  OS << "\n       ";
+  OS << " Offset:" << getOffset() << " Value:" << getValue() << ">";
+}
+
+void MCZeroFillFragment::dump() {
+  raw_ostream &OS = llvm::errs();
+
+  OS << "<MCZeroFillFragment ";
+  this->MCFragment::dump();
+  OS << "\n       ";
+  OS << " Size:" << getSize() << " Alignment:" << getAlignment() << ">";
+}
+
+void MCSectionData::dump() {
+  raw_ostream &OS = llvm::errs();
+
+  OS << "<MCSectionData";
+  OS << " Alignment:" << getAlignment() << " Address:" << Address
+     << " Size:" << Size << " FileSize:" << FileSize
+     << " Fragments:[";
+  for (iterator it = begin(), ie = end(); it != ie; ++it) {
+    if (it != begin()) OS << ",\n      ";
+    it->dump();
+  }
+  OS << "]>";
+}
+
+void MCSymbolData::dump() {
+  raw_ostream &OS = llvm::errs();
+
+  OS << "<MCSymbolData Symbol:" << getSymbol()
+     << " Fragment:" << getFragment() << " Offset:" << getOffset()
+     << " Flags:" << getFlags() << " Index:" << getIndex();
+  if (isCommon())
+    OS << " (common, size:" << getCommonSize()
+       << " align: " << getCommonAlignment() << ")";
+  if (isExternal())
+    OS << " (external)";
+  if (isPrivateExtern())
+    OS << " (private extern)";
+  OS << ">";
+}
+
+void MCAssembler::dump() {
+  raw_ostream &OS = llvm::errs();
+
+  OS << "<MCAssembler\n";
+  OS << "  Sections:[";
+  for (iterator it = begin(), ie = end(); it != ie; ++it) {
+    if (it != begin()) OS << ",\n    ";
+    it->dump();
+  }
+  OS << "],\n";
+  OS << "  Symbols:[";
+
+  for (symbol_iterator it = symbol_begin(), ie = symbol_end(); it != ie; ++it) {
+    if (it != symbol_begin()) OS << ",\n    ";
+    it->dump();
+  }
+  OS << "]>\n";
+}
diff --git a/lib/MC/MCCodeEmitter.cpp b/lib/MC/MCCodeEmitter.cpp
index c122763..accb06c 100644
--- a/lib/MC/MCCodeEmitter.cpp
+++ b/lib/MC/MCCodeEmitter.cpp
@@ -16,3 +16,15 @@ MCCodeEmitter::MCCodeEmitter() {
 
 MCCodeEmitter::~MCCodeEmitter() {
 }
+
+const MCFixupKindInfo &MCCodeEmitter::getFixupKindInfo(MCFixupKind Kind) const {
+  static const MCFixupKindInfo Builtins[] = {
+    { "FK_Data_1", 0, 8 },
+    { "FK_Data_2", 0, 16 },
+    { "FK_Data_4", 0, 32 },
+    { "FK_Data_8", 0, 64 }
+  };
+  
+  assert(Kind <= 3 && "Unknown fixup kind");
+  return Builtins[Kind];
+}
diff --git a/lib/MC/MCInstPrinter.cpp b/lib/MC/MCInstPrinter.cpp
index e90c03c..92a7154 100644
--- a/lib/MC/MCInstPrinter.cpp
+++ b/lib/MC/MCInstPrinter.cpp
@@ -8,7 +8,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCInstPrinter.h"
+#include "llvm/ADT/StringRef.h"
 using namespace llvm;
 
 MCInstPrinter::~MCInstPrinter() {
 }
+
+/// getOpcodeName - Return the name of the specified opcode enum (e.g.
+/// "MOV32ri") or empty if we can't resolve it.
+StringRef MCInstPrinter::getOpcodeName(unsigned Opcode) const {
+  return "";
+}
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index 99a819f..a7a8a5d 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -137,6 +137,8 @@ public:
   virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
                                     unsigned ValueSize = 1,
                                     unsigned MaxBytesToEmit = 0);
+  virtual void EmitCodeAlignment(unsigned ByteAlignment,
+                                 unsigned MaxBytesToEmit = 0);
   virtual void EmitValueToOffset(const MCExpr *Offset,
                                  unsigned char Value = 0);
   
@@ -333,7 +335,22 @@ void MCMachOStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
 
 void MCMachOStreamer::EmitValue(const MCExpr *Value, unsigned Size,
                                 unsigned AddrSpace) {
-  new MCFillFragment(*AddValueSymbols(Value), Size, 1, CurSectionData);
+  MCDataFragment *DF = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
+  if (!DF)
+    DF = new MCDataFragment(CurSectionData);
+
+  // Avoid fixups when possible.
+  int64_t AbsValue;
+  if (AddValueSymbols(Value)->EvaluateAsAbsolute(AbsValue)) {
+    // FIXME: Endianness assumption.
+    for (unsigned i = 0; i != Size; ++i)
+      DF->getContents().push_back(uint8_t(AbsValue >> (i * 8)));
+  } else {
+    DF->getFixups().push_back(MCAsmFixup(DF->getContents().size(),
+                                         *AddValueSymbols(Value),
+                                         MCFixup::getKindForSize(Size)));
+    DF->getContents().resize(DF->getContents().size() + Size, 0);
+  }
 }
 
 void MCMachOStreamer::EmitValueToAlignment(unsigned ByteAlignment,
@@ -342,7 +359,20 @@ void MCMachOStreamer::EmitValueToAlignment(unsigned ByteAlignment,
   if (MaxBytesToEmit == 0)
     MaxBytesToEmit = ByteAlignment;
   new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
-                      CurSectionData);
+                      false /* EmitNops */, CurSectionData);
+
+  // Update the maximum alignment on the current section if necessary.
+  if (ByteAlignment > CurSectionData->getAlignment())
+    CurSectionData->setAlignment(ByteAlignment);
+}
+
+void MCMachOStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+                                        unsigned MaxBytesToEmit) {
+  if (MaxBytesToEmit == 0)
+    MaxBytesToEmit = ByteAlignment;
+  // FIXME the 0x90 is the default x86 1 byte nop opcode.
+  new MCAlignFragment(ByteAlignment, 0x90, 1, MaxBytesToEmit,
+                      true /* EmitNops */, CurSectionData);
 
   // Update the maximum alignment on the current section if necessary.
   if (ByteAlignment > CurSectionData->getAlignment())
@@ -365,12 +395,23 @@ void MCMachOStreamer::EmitInstruction(const MCInst &Inst) {
 
   CurSectionData->setHasInstructions(true);
 
-  // FIXME: Relocations!
   SmallVector<MCFixup, 4> Fixups;
   SmallString<256> Code;
   raw_svector_ostream VecOS(Code);
   Emitter->EncodeInstruction(Inst, VecOS, Fixups);
-  EmitBytes(VecOS.str(), 0);
+  VecOS.flush();
+
+  // Add the fixups and data.
+  MCDataFragment *DF = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
+  if (!DF)
+    DF = new MCDataFragment(CurSectionData);
+  for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+    MCFixup &F = Fixups[i];
+    DF->getFixups().push_back(MCAsmFixup(DF->getContents().size()+F.getOffset(),
+                                         *F.getValue(),
+                                         F.getKind()));
+  }
+  DF->getContents().append(Code.begin(), Code.end());
 }
 
 void MCMachOStreamer::Finish() {
diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp
index 46e9ebf..ab61799 100644
--- a/lib/MC/MCNullStreamer.cpp
+++ b/lib/MC/MCNullStreamer.cpp
@@ -55,6 +55,9 @@ namespace {
                                       unsigned ValueSize = 1,
                                       unsigned MaxBytesToEmit = 0) {}
 
+    virtual void EmitCodeAlignment(unsigned ByteAlignment,
+                                   unsigned MaxBytesToEmit = 0) {}
+
     virtual void EmitValueToOffset(const MCExpr *Offset,
                                    unsigned char Value = 0) {}
     
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index d5bc396..6185c30 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -146,7 +146,7 @@ bool AsmParser::Run() {
   // FIXME: Target hook & command line option for initial section.
   Out.SwitchSection(getMachOSection("__TEXT", "__text",
                                     MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
-                                    0, SectionKind()));
+                                    0, SectionKind::getText()));
 
 
   // Prime the lexer.
@@ -325,9 +325,17 @@ bool AsmParser::ParseExpression(const MCExpr *&Res) {
 ///  expr ::= primaryexpr
 ///
 bool AsmParser::ParseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
+  // Parse the expression.
   Res = 0;
-  return ParsePrimaryExpr(Res, EndLoc) ||
-         ParseBinOpRHS(1, Res, EndLoc);
+  if (ParsePrimaryExpr(Res, EndLoc) || ParseBinOpRHS(1, Res, EndLoc))
+    return true;
+
+  // Try to constant fold it up front, if possible.
+  int64_t Value;
+  if (Res->EvaluateAsAbsolute(Value))
+    Res = MCConstantExpr::Create(Value, getContext());
+
+  return false;
 }
 
 bool AsmParser::ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) {
@@ -906,8 +914,10 @@ bool AsmParser::ParseDirectiveDarwinSection() {
     return Error(Loc, ErrorStr.c_str());
   
   // FIXME: Arch specific.
+  bool isText = Segment == "__TEXT";  // FIXME: Hack.
   Out.SwitchSection(getMachOSection(Segment, Section, TAA, StubSize,
-                                    SectionKind()));
+                                    isText ? SectionKind::getText()
+                                           : SectionKind::getDataRel()));
   return false;
 }
 
@@ -921,8 +931,10 @@ bool AsmParser::ParseDirectiveSectionSwitch(const char *Segment,
   Lex();
   
   // FIXME: Arch specific.
+  bool isText = StringRef(Segment) == "__TEXT";  // FIXME: Hack.
   Out.SwitchSection(getMachOSection(Segment, Section, TAA, StubSize,
-                                    SectionKind()));
+                                    isText ? SectionKind::getText()
+                                    : SectionKind::getDataRel()));
 
   // Set the implicit alignment, if any.
   //
@@ -1229,8 +1241,14 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
     }
   }
 
-  // FIXME: Target specific behavior about how the "extra" bytes are filled.
-  Out.EmitValueToAlignment(Alignment, FillExpr, ValueSize, MaxBytesToFill);
+  // FIXME: hard code the parser to use EmitCodeAlignment for text when using
+  // the TextAlignFillValue.
+  if(Out.getCurrentSection()->getKind().isText() && 
+     Lexer.getMAI().getTextAlignFillValue() == FillExpr)
+    Out.EmitCodeAlignment(Alignment, MaxBytesToFill);
+  else
+    // FIXME: Target specific behavior about how the "extra" bytes are filled.
+    Out.EmitValueToAlignment(Alignment, FillExpr, ValueSize, MaxBytesToFill);
 
   return false;
 }
@@ -1354,7 +1372,7 @@ bool AsmParser::ParseDirectiveComm(bool IsLocal) {
   if (IsLocal) {
     Out.EmitZerofill(getMachOSection("__DATA", "__bss",
                                      MCSectionMachO::S_ZEROFILL, 0,
-                                     SectionKind()),
+                                     SectionKind::getBSS()),
                      Sym, Size, 1 << Pow2Alignment);
     return false;
   }
@@ -1390,7 +1408,7 @@ bool AsmParser::ParseDirectiveDarwinZerofill() {
     // Create the zerofill section but no symbol
     Out.EmitZerofill(getMachOSection(Segment, Section,
                                      MCSectionMachO::S_ZEROFILL, 0,
-                                     SectionKind()));
+                                     SectionKind::getBSS()));
     return false;
   }
 
@@ -1448,7 +1466,7 @@ bool AsmParser::ParseDirectiveDarwinZerofill() {
   // FIXME: Arch specific.
   Out.EmitZerofill(getMachOSection(Segment, Section,
                                  MCSectionMachO::S_ZEROFILL, 0,
-                                 SectionKind()),
+                                 SectionKind::getBSS()),
                    Sym, Size, 1 << Pow2Alignment);
 
   return false;
diff --git a/lib/MC/MCSectionMachO.cpp b/lib/MC/MCSectionMachO.cpp
index 6cc67a2..370aad1 100644
--- a/lib/MC/MCSectionMachO.cpp
+++ b/lib/MC/MCSectionMachO.cpp
@@ -10,6 +10,7 @@
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/Support/raw_ostream.h"
+#include <ctype.h>
 using namespace llvm;
 
 /// SectionTypeDescriptors - These are strings that describe the various section
diff --git a/lib/MC/TargetAsmBackend.cpp b/lib/MC/TargetAsmBackend.cpp
new file mode 100644
index 0000000..918d272
--- /dev/null
+++ b/lib/MC/TargetAsmBackend.cpp
@@ -0,0 +1,19 @@
+//===-- TargetAsmBackend.cpp - Target Assembly Backend ---------------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmBackend.h"
+using namespace llvm;
+
+TargetAsmBackend::TargetAsmBackend(const Target &T)
+  : TheTarget(T)
+{
+}
+
+TargetAsmBackend::~TargetAsmBackend() {
+}
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index 1e6d22f..8f860a6 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -17,6 +17,7 @@
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include <limits.h>
 #include <cstring>
 
 using namespace llvm;
@@ -625,17 +626,58 @@ APFloat::copySignificand(const APFloat &rhs)
 /* Make this number a NaN, with an arbitrary but deterministic value
    for the significand.  If double or longer, this is a signalling NaN,
    which may not be ideal.  If float, this is QNaN(0).  */
-void
-APFloat::makeNaN(unsigned type)
+void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
 {
   category = fcNaN;
-  // FIXME: Add double and long double support for QNaN(0).
-  if (semantics->precision == 24 && semantics->maxExponent == 127) {
-    type |=  0x7fc00000U;
-    type &= ~0x80000000U;
-  } else
-    type = ~0U;
-  APInt::tcSet(significandParts(), type, partCount());
+  sign = Negative;
+
+  integerPart *significand = significandParts();
+  unsigned numParts = partCount();
+
+  // Set the significand bits to the fill.
+  if (!fill || fill->getNumWords() < numParts)
+    APInt::tcSet(significand, 0, numParts);
+  if (fill) {
+    APInt::tcAssign(significand, fill->getRawData(),
+                    std::min(fill->getNumWords(), numParts));
+
+    // Zero out the excess bits of the significand.
+    unsigned bitsToPreserve = semantics->precision - 1;
+    unsigned part = bitsToPreserve / 64;
+    bitsToPreserve %= 64;
+    significand[part] &= ((1ULL << bitsToPreserve) - 1);
+    for (part++; part != numParts; ++part)
+      significand[part] = 0;
+  }
+
+  unsigned QNaNBit = semantics->precision - 2;
+
+  if (SNaN) {
+    // We always have to clear the QNaN bit to make it an SNaN.
+    APInt::tcClearBit(significand, QNaNBit);
+
+    // If there are no bits set in the payload, we have to set
+    // *something* to make it a NaN instead of an infinity;
+    // conventionally, this is the next bit down from the QNaN bit.
+    if (APInt::tcIsZero(significand, numParts))
+      APInt::tcSetBit(significand, QNaNBit - 1);
+  } else {
+    // We always have to set the QNaN bit to make it a QNaN.
+    APInt::tcSetBit(significand, QNaNBit);
+  }
+
+  // For x87 extended precision, we want to make a NaN, not a
+  // pseudo-NaN.  Maybe we should expose the ability to make
+  // pseudo-NaNs?
+  if (semantics == &APFloat::x87DoubleExtended)
+    APInt::tcSetBit(significand, QNaNBit + 1);
+}
+
+APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
+                         const APInt *fill) {
+  APFloat value(Sem, uninitialized);
+  value.makeNaN(SNaN, Negative, fill);
+  return value;
 }
 
 APFloat &
@@ -700,9 +742,14 @@ APFloat::APFloat(const fltSemantics &ourSemantics) {
   sign = false;
 }
 
+APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) {
+  assertArithmeticOK(ourSemantics);
+  // Allocates storage if necessary but does not initialize it.
+  initialize(&ourSemantics);
+}
 
 APFloat::APFloat(const fltSemantics &ourSemantics,
-                 fltCategory ourCategory, bool negative, unsigned type)
+                 fltCategory ourCategory, bool negative)
 {
   assertArithmeticOK(ourSemantics);
   initialize(&ourSemantics);
@@ -711,7 +758,7 @@ APFloat::APFloat(const fltSemantics &ourSemantics,
   if (category == fcNormal)
     category = fcZero;
   else if (ourCategory == fcNaN)
-    makeNaN(type);
+    makeNaN();
 }
 
 APFloat::APFloat(const fltSemantics &ourSemantics, const StringRef& text)
@@ -2345,11 +2392,24 @@ APFloat::convertFromDecimalString(const StringRef &str, roundingMode rounding_mo
   if (decDigitValue(*D.firstSigDigit) >= 10U) {
     category = fcZero;
     fs = opOK;
-  } else if ((D.normalizedExponent + 1) * 28738
-             <= 8651 * (semantics->minExponent - (int) semantics->precision)) {
+
+  /* Check whether the normalized exponent is high enough to overflow
+     max during the log-rebasing in the max-exponent check below. */
+  } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
+    fs = handleOverflow(rounding_mode);
+
+  /* If it wasn't, then it also wasn't high enough to overflow max
+     during the log-rebasing in the min-exponent check.  Check that it
+     won't overflow min in either check, then perform the min-exponent
+     check. */
+  } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
+             (D.normalizedExponent + 1) * 28738 <=
+               8651 * (semantics->minExponent - (int) semantics->precision)) {
     /* Underflow to zero and round.  */
     zeroSignificand();
     fs = normalize(rounding_mode, lfLessThanHalf);
+
+  /* We can finally safely perform the max-exponent check. */
   } else if ((D.normalizedExponent - 1) * 42039
              >= 12655 * semantics->maxExponent) {
     /* Overflow and round.  */
@@ -3306,7 +3366,7 @@ namespace {
 
 void APFloat::toString(SmallVectorImpl<char> &Str,
                        unsigned FormatPrecision,
-                       unsigned FormatMaxPadding) {
+                       unsigned FormatMaxPadding) const {
   switch (category) {
   case fcInfinity:
     if (isNegative())
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 3bce3f3..6a6384a 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -2344,13 +2344,21 @@ APInt::tcExtractBit(const integerPart *parts, unsigned int bit)
          & ((integerPart) 1 << bit % integerPartWidth)) != 0;
 }
 
-/* Set the given bit of a bignum.  */
+/* Set the given bit of a bignum. */
 void
 APInt::tcSetBit(integerPart *parts, unsigned int bit)
 {
   parts[bit / integerPartWidth] |= (integerPart) 1 << (bit % integerPartWidth);
 }
 
+/* Clears the given bit of a bignum. */
+void
+APInt::tcClearBit(integerPart *parts, unsigned int bit)
+{
+  parts[bit / integerPartWidth] &=
+    ~((integerPart) 1 << (bit % integerPartWidth));
+}
+
 /* Returns the bit number of the least significant set bit of a
    number.  If the input number has no bits set -1U is returned.  */
 unsigned int
diff --git a/lib/Support/Android.mk b/lib/Support/Android.mk
new file mode 100644
index 0000000..e972753
--- /dev/null
+++ b/lib/Support/Android.mk
@@ -0,0 +1,72 @@
+LOCAL_PATH:= $(call my-dir)
+
+support_SRC_FILES :=	\
+	APFloat.cpp	\
+	APInt.cpp	\
+	APSInt.cpp	\
+	Allocator.cpp	\
+	CommandLine.cpp	\
+	ConstantRange.cpp	\
+	Debug.cpp	\
+	DeltaAlgorithm.cpp	\
+	Dwarf.cpp	\
+	ErrorHandling.cpp	\
+	FileUtilities.cpp	\
+	FoldingSet.cpp	\
+	FormattedStream.cpp	\
+	GraphWriter.cpp	\
+	IsInf.cpp	\
+	IsNAN.cpp	\
+	ManagedStatic.cpp	\
+	MemoryBuffer.cpp	\
+	MemoryObject.cpp	\
+	PluginLoader.cpp	\
+	PrettyStackTrace.cpp	\
+	Regex.cpp	\
+	SlowOperationInformer.cpp	\
+	SmallPtrSet.cpp	\
+	SmallVector.cpp	\
+	SourceMgr.cpp	\
+	Statistic.cpp	\
+	StringExtras.cpp	\
+	StringMap.cpp	\
+	StringPool.cpp	\
+	StringRef.cpp	\
+	SystemUtils.cpp	\
+	TargetRegistry.cpp	\
+	Timer.cpp	\
+	Triple.cpp	\
+	Twine.cpp	\
+	circular_raw_ostream.cpp	\
+	raw_os_ostream.cpp	\
+	raw_ostream.cpp	\
+	regcomp.c	\
+	regerror.c	\
+	regexec.c	\
+	regfree.c	\
+	regstrlcpy.c
+
+# For the host
+# =====================================================
+include $(CLEAR_VARS)
+
+# FIXME: This only requires RTTI because tblgen uses it.  Fix that.
+REQUIRES_RTTI := 1
+
+LOCAL_SRC_FILES := $(support_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMSupport
+
+include $(LLVM_HOST_BUILD_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+# For the device
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(support_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMSupport
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index 961dc1f..2ab4103 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -650,7 +650,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
     if (Handler == 0) {
       if (SinkOpts.empty()) {
         errs() << ProgramName << ": Unknown command line argument '"
-             << argv[i] << "'.  Try: '" << argv[0] << " --help'\n";
+             << argv[i] << "'.  Try: '" << argv[0] << " -help'\n";
         ErrorParsing = true;
       } else {
         for (SmallVectorImpl<Option*>::iterator I = SinkOpts.begin(),
@@ -673,7 +673,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
     errs() << ProgramName
          << ": Not enough positional command line arguments specified!\n"
          << "Must specify at least " << NumPositionalRequired
-         << " positional arguments: See: " << argv[0] << " --help\n";
+         << " positional arguments: See: " << argv[0] << " -help\n";
 
     ErrorParsing = true;
   } else if (!HasUnlimitedPositionals
@@ -681,7 +681,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
     errs() << ProgramName
          << ": Too many positional arguments specified!\n"
          << "Can specify at most " << PositionalOpts.size()
-         << " positional arguments: See: " << argv[0] << " --help\n";
+         << " positional arguments: See: " << argv[0] << " -help\n";
     ErrorParsing = true;
 
   } else if (ConsumeAfterOpt == 0) {
@@ -1029,7 +1029,7 @@ void generic_parser_base::printOptionInfo(const Option &O,
 
 
 //===----------------------------------------------------------------------===//
-// --help and --help-hidden option implementation
+// -help and -help-hidden option implementation
 //
 
 static int OptNameCompare(const void *LHS, const void *RHS) {
@@ -1134,7 +1134,7 @@ static HelpPrinter NormalPrinter(false);
 static HelpPrinter HiddenPrinter(true);
 
 static cl::opt<HelpPrinter, true, parser<bool> >
-HOp("help", cl::desc("Display available options (--help-hidden for more)"),
+HOp("help", cl::desc("Display available options (-help-hidden for more)"),
     cl::location(NormalPrinter), cl::ValueDisallowed);
 
 static cl::opt<HelpPrinter, true, parser<bool> >
@@ -1222,8 +1222,8 @@ void cl::PrintHelpMessage() {
   // NormalPrinter variable is a HelpPrinter and the help gets printed when
   // its operator= is invoked. That's because the "normal" usages of the
   // help printer is to be assigned true/false depending on whether the
-  // --help option was given or not. Since we're circumventing that we have
-  // to make it look like --help was given, so we assign true.
+  // -help option was given or not. Since we're circumventing that we have
+  // to make it look like -help was given, so we assign true.
   NormalPrinter = true;
 }
 
diff --git a/lib/Support/FormattedStream.cpp b/lib/Support/FormattedStream.cpp
index 9ab3666..c72b5a1 100644
--- a/lib/Support/FormattedStream.cpp
+++ b/lib/Support/FormattedStream.cpp
@@ -56,15 +56,14 @@ void formatted_raw_ostream::ComputeColumn(const char *Ptr, size_t Size) {
 /// PadToColumn - Align the output to some column number.
 ///
 /// \param NewCol - The column to move to.
-/// \param MinPad - The minimum space to give after the most recent
-/// I/O, even if the current column + minpad > newcol.
 ///
-void formatted_raw_ostream::PadToColumn(unsigned NewCol) { 
+formatted_raw_ostream &formatted_raw_ostream::PadToColumn(unsigned NewCol) { 
   // Figure out what's in the buffer and add it to the column count.
   ComputeColumn(getBufferStart(), GetNumBytesInBuffer());
 
   // Output spaces until we reach the desired column.
   indent(std::max(int(NewCol - ColumnScanned), 1));
+  return *this;
 }
 
 void formatted_raw_ostream::write_impl(const char *Ptr, size_t Size) {
diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp
index c8bca6e..ec84f9b 100644
--- a/lib/Support/GraphWriter.cpp
+++ b/lib/Support/GraphWriter.cpp
@@ -137,7 +137,7 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
     args.clear();
     args.push_back(gv.c_str());
     args.push_back(PSFilename.c_str());
-    args.push_back("-spartan");
+    args.push_back("--spartan");
     args.push_back(0);
     
     ErrMsg.clear();
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index 9253b01..eb046d0 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -174,7 +174,8 @@ MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr,
 #ifdef O_BINARY
   OpenFlags |= O_BINARY;  // Open input file in binary mode on win32.
 #endif
-  int FD = ::open(Filename.str().c_str(), O_RDONLY|OpenFlags);
+  SmallString<256> PathBuf(Filename.begin(), Filename.end());
+  int FD = ::open(PathBuf.c_str(), O_RDONLY|OpenFlags);
   if (FD == -1) {
     if (ErrStr) *ErrStr = strerror(errno);
     return 0;
diff --git a/lib/Support/Regex.cpp b/lib/Support/Regex.cpp
index 618ca05..a7631de 100644
--- a/lib/Support/Regex.cpp
+++ b/lib/Support/Regex.cpp
@@ -90,3 +90,79 @@ bool Regex::match(const StringRef &String, SmallVectorImpl<StringRef> *Matches){
 
   return true;
 }
+
+std::string Regex::sub(StringRef Repl, StringRef String,
+                       std::string *Error) {
+  SmallVector<StringRef, 8> Matches;
+
+  // Reset error, if given.
+  if (Error && !Error->empty()) *Error = "";
+
+  // Return the input if there was no match.
+  if (!match(String, &Matches))
+    return String;
+
+  // Otherwise splice in the replacement string, starting with the prefix before
+  // the match.
+  std::string Res(String.begin(), Matches[0].begin());
+
+  // Then the replacement string, honoring possible substitutions.
+  while (!Repl.empty()) {
+    // Skip to the next escape.
+    std::pair<StringRef, StringRef> Split = Repl.split('\\');
+
+    // Add the skipped substring.
+    Res += Split.first;
+
+    // Check for terminimation and trailing backslash.
+    if (Split.second.empty()) {
+      if (Repl.size() != Split.first.size() &&
+          Error && Error->empty())
+        *Error = "replacement string contained trailing backslash";
+      break;
+    }
+
+    // Otherwise update the replacement string and interpret escapes.
+    Repl = Split.second;
+
+    // FIXME: We should have a StringExtras function for mapping C99 escapes.
+    switch (Repl[0]) {
+      // Treat all unrecognized characters as self-quoting.
+    default:
+      Res += Repl[0];
+      Repl = Repl.substr(1);
+      break;
+
+      // Single character escapes.
+    case 't':
+      Res += '\t';
+      Repl = Repl.substr(1);
+      break;
+    case 'n':
+      Res += '\n';
+      Repl = Repl.substr(1);
+      break;
+
+      // Decimal escapes are backreferences.
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7': case '8': case '9': {
+      // Extract the backreference number.
+      StringRef Ref = Repl.slice(0, Repl.find_first_not_of("0123456789"));
+      Repl = Repl.substr(Ref.size());
+
+      unsigned RefValue;
+      if (!Ref.getAsInteger(10, RefValue) &&
+          RefValue < Matches.size())
+        Res += Matches[RefValue];
+      else if (Error && Error->empty())
+        *Error = "invalid backreference string '" + Ref.str() + "'";
+      break;
+    }
+    }
+  }
+
+  // And finally the suffix.
+  Res += StringRef(Matches[0].end(), String.end() - Matches[0].end());
+
+  return Res;
+}
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp
index ae2640b..2b262dc 100644
--- a/lib/Support/StringRef.cpp
+++ b/lib/Support/StringRef.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/APInt.h"
 
 using namespace llvm;
 
@@ -172,23 +173,28 @@ size_t StringRef::count(StringRef Str) const {
   return Count;
 }
 
+static unsigned GetAutoSenseRadix(StringRef &Str) {
+  if (Str.startswith("0x")) {
+    Str = Str.substr(2);
+    return 16;
+  } else if (Str.startswith("0b")) {
+    Str = Str.substr(2);
+    return 2;
+  } else if (Str.startswith("0")) {
+    return 8;
+  } else {
+    return 10;
+  }
+}
+
+
 /// GetAsUnsignedInteger - Workhorse method that converts a integer character
 /// sequence of radix up to 36 to an unsigned long long value.
 static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix,
                                  unsigned long long &Result) {
   // Autosense radix if not specified.
-  if (Radix == 0) {
-    if (Str.startswith("0x")) {
-      Str = Str.substr(2);
-      Radix = 16;
-    } else if (Str.startswith("0b")) {
-      Str = Str.substr(2);
-      Radix = 2;
-    } else if (Str.startswith("0"))
-      Radix = 8;
-    else
-      Radix = 10;
-  }
+  if (Radix == 0)
+    Radix = GetAutoSenseRadix(Str);
   
   // Empty strings (after the radix autosense) are invalid.
   if (Str.empty()) return true;
@@ -272,3 +278,78 @@ bool StringRef::getAsInteger(unsigned Radix, unsigned &Result) const {
   Result = Val;
   return false;
 }  
+
+bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
+  StringRef Str = *this;
+
+  // Autosense radix if not specified.
+  if (Radix == 0)
+    Radix = GetAutoSenseRadix(Str);
+
+  assert(Radix > 1 && Radix <= 36);
+  
+  // Empty strings (after the radix autosense) are invalid.
+  if (Str.empty()) return true;
+
+  // Skip leading zeroes.  This can be a significant improvement if
+  // it means we don't need > 64 bits.
+  while (!Str.empty() && Str.front() == '0')
+    Str = Str.substr(1);
+
+  // If it was nothing but zeroes....
+  if (Str.empty()) {
+    Result = APInt(64, 0);
+    return false;
+  }
+
+  // (Over-)estimate the required number of bits.
+  unsigned Log2Radix = 0;
+  while ((1U << Log2Radix) < Radix) Log2Radix++;
+  bool IsPowerOf2Radix = ((1U << Log2Radix) == Radix);
+
+  unsigned BitWidth = Log2Radix * Str.size();
+  if (BitWidth < Result.getBitWidth())
+    BitWidth = Result.getBitWidth(); // don't shrink the result
+  else
+    Result.zext(BitWidth);
+
+  APInt RadixAP, CharAP; // unused unless !IsPowerOf2Radix
+  if (!IsPowerOf2Radix) {
+    // These must have the same bit-width as Result.
+    RadixAP = APInt(BitWidth, Radix);
+    CharAP = APInt(BitWidth, 0);
+  }
+
+  // Parse all the bytes of the string given this radix.
+  Result = 0;
+  while (!Str.empty()) {
+    unsigned CharVal;
+    if (Str[0] >= '0' && Str[0] <= '9')
+      CharVal = Str[0]-'0';
+    else if (Str[0] >= 'a' && Str[0] <= 'z')
+      CharVal = Str[0]-'a'+10;
+    else if (Str[0] >= 'A' && Str[0] <= 'Z')
+      CharVal = Str[0]-'A'+10;
+    else
+      return true;
+    
+    // If the parsed value is larger than the integer radix, the string is
+    // invalid.
+    if (CharVal >= Radix)
+      return true;
+    
+    // Add in this character.
+    if (IsPowerOf2Radix) {
+      Result <<= Log2Radix;
+      Result |= CharVal;
+    } else {
+      Result *= RadixAP;
+      CharAP = CharVal;
+      Result += CharAP;
+    }
+
+    Str = Str.substr(1);
+  }
+  
+  return false;
+}
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index 5a76184..61bf0a7 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -40,6 +40,7 @@ const char *Triple::getArchTypeName(ArchType Kind) {
   case x86:     return "i386";
   case x86_64:  return "x86_64";
   case xcore:   return "xcore";
+  case mblaze:  return "mblaze";
   }
 
   return "<invalid>";
@@ -62,6 +63,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
   case ppc64:
   case ppc:     return "ppc";
 
+  case mblaze:  return "mblaze";
+
   case sparcv9:
   case sparc:   return "sparc";
 
@@ -127,6 +130,8 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
     return ppc64;
   if (Name == "ppc")
     return ppc;
+  if (Name == "mblaze")
+    return mblaze;
   if (Name == "sparc")
     return sparc;
   if (Name == "sparcv9")
@@ -198,6 +203,8 @@ const char *Triple::getArchNameForAssembler() {
     return "ppc";
   if (Str == "powerpc64")
     return "ppc64";
+  if (Str == "mblaze" || Str == "microblaze")
+    return "mblaze";
   if (Str == "arm")
     return "arm";
   if (Str == "armv4t" || Str == "thumbv4t")
@@ -234,6 +241,8 @@ void Triple::Parse() const {
     Arch = ppc;
   else if ((ArchName == "powerpc64") || (ArchName == "ppu"))
     Arch = ppc64;
+  else if (ArchName == "mblaze")
+    Arch = mblaze;
   else if (ArchName == "arm" ||
            ArchName.startswith("armv") ||
            ArchName == "xscale")
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index af6dc7c..071c924 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -368,6 +368,7 @@ void format_object_base::home() {
 /// if no error occurred.
 raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo,
                                unsigned Flags) : pos(0) {
+  assert(Filename != 0 && "Filename is null");
   // Verify that we don't have both "append" and "excl".
   assert((!(Flags & F_Excl) || !(Flags & F_Append)) &&
          "Cannot specify both 'excl' and 'append' file creation flags!");
@@ -574,12 +575,18 @@ void raw_svector_ostream::resync() {
 }
 
 void raw_svector_ostream::write_impl(const char *Ptr, size_t Size) {
-  assert(Ptr == OS.end() && OS.size() + Size <= OS.capacity() &&
-         "Invalid write_impl() call!");
-
-  // We don't need to copy the bytes, just commit the bytes to the
-  // SmallVector.
-  OS.set_size(OS.size() + Size);
+  // If we're writing bytes from the end of the buffer into the smallvector, we
+  // don't need to copy the bytes, just commit the bytes because they are
+  // already in the right place.
+  if (Ptr == OS.end()) {
+    assert(OS.size() + Size <= OS.capacity() && "Invalid write_impl() call!");
+    OS.set_size(OS.size() + Size);
+  } else {
+    assert(GetNumBytesInBuffer() == 0 &&
+           "Should be writing from buffer if some bytes in it");
+    // Otherwise, do copy the bytes.
+    OS.append(Ptr, Ptr+Size);
+  }
 
   // Grow the vector if necessary.
   if (OS.capacity() - OS.size() < 64)
diff --git a/lib/System/Android.mk b/lib/System/Android.mk
new file mode 100644
index 0000000..3f11fc7
--- /dev/null
+++ b/lib/System/Android.mk
@@ -0,0 +1,46 @@
+LOCAL_PATH:= $(call my-dir)
+
+system_SRC_FILES :=	\
+	Alarm.cpp	\
+	Atomic.cpp	\
+	Disassembler.cpp	\
+	Errno.cpp	\
+	Host.cpp	\
+	IncludeFile.cpp	\
+	Memory.cpp	\
+	Mutex.cpp	\
+	Path.cpp	\
+	Process.cpp	\
+	Program.cpp	\
+	RWMutex.cpp	\
+	Signals.cpp	\
+	ThreadLocal.cpp	\
+	Threading.cpp	\
+	TimeValue.cpp
+
+# For the host
+# =====================================================
+include $(CLEAR_VARS)
+
+REQUIRES_RTTI := 1
+
+LOCAL_SRC_FILES := $(system_SRC_FILES)
+LOCAL_CFLAGS +=	-march=i686
+
+LOCAL_MODULE:= libLLVMSystem
+
+include $(LLVM_HOST_BUILD_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+# For the device
+# =====================================================
+include $(CLEAR_VARS)
+
+REQUIRES_RTTI := 1
+
+LOCAL_SRC_FILES := $(system_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMSystem
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/lib/System/Unix/Host.inc b/lib/System/Unix/Host.inc
index c76d6a4..5b11876 100644
--- a/lib/System/Unix/Host.inc
+++ b/lib/System/Unix/Host.inc
@@ -21,6 +21,7 @@
 #include "Unix.h"
 #include <sys/utsname.h>
 #include <string>
+#include <ctype.h>
 
 using namespace llvm;
 
diff --git a/lib/System/Unix/Program.inc b/lib/System/Unix/Program.inc
index e8c2806..c10498a 100644
--- a/lib/System/Unix/Program.inc
+++ b/lib/System/Unix/Program.inc
@@ -126,7 +126,7 @@ static void TimeOutHandler(int Sig) {
 
 static void SetMemoryLimits (unsigned size)
 {
-#if HAVE_SYS_RESOURCE_H
+#if HAVE_SYS_RESOURCE_H && HAVE_GETRLIMIT && HAVE_SETRLIMIT
   struct rlimit r;
   __typeof__ (r.rlim_cur) limit = (__typeof__ (r.rlim_cur)) (size) * 1048576;
 
diff --git a/lib/System/Unix/Signals.inc b/lib/System/Unix/Signals.inc
index 676e1e5..c8ec68a 100644
--- a/lib/System/Unix/Signals.inc
+++ b/lib/System/Unix/Signals.inc
@@ -52,7 +52,16 @@ static const int *const IntSigsEnd =
 // KillSigs - Signals that are synchronous with the program that will cause it
 // to die.
 static const int KillSigs[] = {
-  SIGILL, SIGTRAP, SIGABRT, SIGFPE, SIGBUS, SIGSEGV, SIGSYS, SIGXCPU, SIGXFSZ
+  SIGILL, SIGTRAP, SIGABRT, SIGFPE, SIGBUS, SIGSEGV
+#ifdef SIGSYS
+  , SIGSYS
+#endif
+#ifdef SIGXCPU
+  , SIGXCPU
+#endif
+#ifdef SIGXFSZ
+  , SIGXFSZ
+#endif
 #ifdef SIGEMT
   , SIGEMT
 #endif
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 6fe7c2c..8e537d8 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -643,6 +643,13 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
   DebugLoc DL = DebugLoc::getUnknownLoc();
   if (I != MBB.end()) DL = I->getDebugLoc();
 
+  // tGPR is used sometimes in ARM instructions that need to avoid using
+  // certain registers.  Just treat it as GPR here.
+  if (DestRC == ARM::tGPRRegisterClass)
+    DestRC = ARM::GPRRegisterClass;
+  if (SrcRC == ARM::tGPRRegisterClass)
+    SrcRC = ARM::GPRRegisterClass;
+
   if (DestRC != SrcRC) {
     if (DestRC->getSize() != SrcRC->getSize())
       return false;
@@ -697,6 +704,11 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                             MFI.getObjectSize(FI),
                             Align);
 
+  // tGPR is used sometimes in ARM instructions that need to avoid using
+  // certain registers.  Just treat it as GPR here.
+  if (RC == ARM::tGPRRegisterClass)
+    RC = ARM::GPRRegisterClass;
+
   if (RC == ARM::GPRRegisterClass) {
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR))
                    .addReg(SrcReg, getKillRegState(isKill))
@@ -745,6 +757,11 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                             MFI.getObjectSize(FI),
                             Align);
 
+  // tGPR is used sometimes in ARM instructions that need to avoid using
+  // certain registers.  Just treat it as GPR here.
+  if (RC == ARM::tGPRRegisterClass)
+    RC = ARM::GPRRegisterClass;
+
   if (RC == ARM::GPRRegisterClass) {
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg)
                    .addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO));
@@ -1020,9 +1037,8 @@ ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const {
   return MI;
 }
 
-bool ARMBaseInstrInfo::isIdentical(const MachineInstr *MI0,
-                                  const MachineInstr *MI1,
-                                  const MachineRegisterInfo *MRI) const {
+bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
+                                        const MachineInstr *MI1) const {
   int Opcode = MI0->getOpcode();
   if (Opcode == ARM::t2LDRpci ||
       Opcode == ARM::t2LDRpci_pic ||
@@ -1051,7 +1067,7 @@ bool ARMBaseInstrInfo::isIdentical(const MachineInstr *MI0,
     return ACPV0->hasSameValue(ACPV1);
   }
 
-  return TargetInstrInfoImpl::isIdentical(MI0, MI1, MRI);
+  return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
 }
 
 /// getInstrPredicate - If instruction is predicated, returns its predicate
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 0d9d4a7..0194231 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -289,8 +289,8 @@ public:
 
   MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const;
 
-  virtual bool isIdentical(const MachineInstr *MI, const MachineInstr *Other,
-                           const MachineRegisterInfo *MRI) const;
+  virtual bool produceSameValue(const MachineInstr *MI0,
+                                const MachineInstr *MI1) const;
 };
 
 static inline
@@ -332,7 +332,7 @@ bool isJumpTableBranchOpcode(int Opc) {
 
 static inline
 bool isIndirectBranchOpcode(int Opc) {
-  return Opc == ARM::BRIND || Opc == ARM::tBRIND;
+  return Opc == ARM::BRIND || Opc == ARM::MOVPCRX || Opc == ARM::tBRIND;
 }
 
 /// getInstrPredicate - If instruction is predicated, returns its predicate
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index cb0bd1d..577c363 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -513,7 +513,7 @@ cannotEliminateFrame(const MachineFunction &MF) const {
 }
 
 /// estimateStackSize - Estimate and return the size of the frame.
-static unsigned estimateStackSize(MachineFunction &MF, MachineFrameInfo *MFI) {
+static unsigned estimateStackSize(MachineFunction &MF) {
   const MachineFrameInfo *FFI = MF.getFrameInfo();
   int Offset = 0;
   for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
@@ -583,14 +583,6 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
   SmallVector<unsigned, 4> UnspilledCS2GPRs;
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
 
-
-  // Calculate and set max stack object alignment early, so we can decide
-  // whether we will need stack realignment (and thus FP).
-  if (RealignStack) {
-    MachineFrameInfo *MFI = MF.getFrameInfo();
-    MFI->calculateMaxStackAlignment();
-  }
-
   // Spill R4 if Thumb2 function requires stack realignment - it will be used as
   // scratch register.
   // FIXME: It will be better just to find spare register here.
@@ -679,8 +671,16 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
     }
   }
 
+  // If any of the stack slot references may be out of range of an immediate
+  // offset, make sure a register (or a spill slot) is available for the
+  // register scavenger. Note that if we're indexing off the frame pointer, the
+  // effective stack size is 4 bytes larger since the FP points to the stack
+  // slot of the previous FP.
+  bool BigStack = RS &&
+    estimateStackSize(MF) + (hasFP(MF) ? 4 : 0) >= estimateRSStackSizeLimit(MF);
+
   bool ExtraCSSpill = false;
-  if (!CanEliminateFrame || cannotEliminateFrame(MF)) {
+  if (BigStack || !CanEliminateFrame || cannotEliminateFrame(MF)) {
     AFI->setHasStackFrame(true);
 
     // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
@@ -735,51 +735,43 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
     // callee-saved register or reserve a special spill slot to facilitate
     // register scavenging. Thumb1 needs a spill slot for stack pointer
     // adjustments also, even when the frame itself is small.
-    if (RS && !ExtraCSSpill) {
-      MachineFrameInfo  *MFI = MF.getFrameInfo();
-      // If any of the stack slot references may be out of range of an
-      // immediate offset, make sure a register (or a spill slot) is
-      // available for the register scavenger. Note that if we're indexing
-      // off the frame pointer, the effective stack size is 4 bytes larger
-      // since the FP points to the stack slot of the previous FP.
-      if (estimateStackSize(MF, MFI) + (hasFP(MF) ? 4 : 0)
-          >= estimateRSStackSizeLimit(MF)) {
-        // If any non-reserved CS register isn't spilled, just spill one or two
-        // extra. That should take care of it!
-        unsigned NumExtras = TargetAlign / 4;
-        SmallVector<unsigned, 2> Extras;
-        while (NumExtras && !UnspilledCS1GPRs.empty()) {
-          unsigned Reg = UnspilledCS1GPRs.back();
-          UnspilledCS1GPRs.pop_back();
+    if (BigStack && !ExtraCSSpill) {
+      // If any non-reserved CS register isn't spilled, just spill one or two
+      // extra. That should take care of it!
+      unsigned NumExtras = TargetAlign / 4;
+      SmallVector<unsigned, 2> Extras;
+      while (NumExtras && !UnspilledCS1GPRs.empty()) {
+        unsigned Reg = UnspilledCS1GPRs.back();
+        UnspilledCS1GPRs.pop_back();
+        if (!isReservedReg(MF, Reg)) {
+          Extras.push_back(Reg);
+          NumExtras--;
+        }
+      }
+      // For non-Thumb1 functions, also check for hi-reg CS registers
+      if (!AFI->isThumb1OnlyFunction()) {
+        while (NumExtras && !UnspilledCS2GPRs.empty()) {
+          unsigned Reg = UnspilledCS2GPRs.back();
+          UnspilledCS2GPRs.pop_back();
           if (!isReservedReg(MF, Reg)) {
             Extras.push_back(Reg);
             NumExtras--;
           }
         }
-        // For non-Thumb1 functions, also check for hi-reg CS registers
-        if (!AFI->isThumb1OnlyFunction()) {
-          while (NumExtras && !UnspilledCS2GPRs.empty()) {
-            unsigned Reg = UnspilledCS2GPRs.back();
-            UnspilledCS2GPRs.pop_back();
-            if (!isReservedReg(MF, Reg)) {
-              Extras.push_back(Reg);
-              NumExtras--;
-            }
-          }
-        }
-        if (Extras.size() && NumExtras == 0) {
-          for (unsigned i = 0, e = Extras.size(); i != e; ++i) {
-            MF.getRegInfo().setPhysRegUsed(Extras[i]);
-            AFI->setCSRegisterIsSpilled(Extras[i]);
-          }
-        } else if (!AFI->isThumb1OnlyFunction()) {
-          // note: Thumb1 functions spill to R12, not the stack.
-          // Reserve a slot closest to SP or frame pointer.
-          const TargetRegisterClass *RC = ARM::GPRRegisterClass;
-          RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
-                                                             RC->getAlignment(),
-                                                             false));
+      }
+      if (Extras.size() && NumExtras == 0) {
+        for (unsigned i = 0, e = Extras.size(); i != e; ++i) {
+          MF.getRegInfo().setPhysRegUsed(Extras[i]);
+          AFI->setCSRegisterIsSpilled(Extras[i]);
         }
+      } else if (!AFI->isThumb1OnlyFunction()) {
+        // note: Thumb1 functions spill to R12, not the stack.  Reserve a slot
+        // closest to SP or frame pointer.
+        const TargetRegisterClass *RC = ARM::GPRRegisterClass;
+        MachineFrameInfo *MFI = MF.getFrameInfo();
+        RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+                                                           RC->getAlignment(),
+                                                           false));
       }
     }
   }
@@ -1093,6 +1085,15 @@ hasReservedCallFrame(MachineFunction &MF) const {
   return !MF.getFrameInfo()->hasVarSizedObjects();
 }
 
+// canSimplifyCallFramePseudos - If there is a reserved call frame, the
+// call frame pseudos can be simplified. Unlike most targets, having a FP
+// is not sufficient here since we still may reference some objects via SP
+// even when FP is available in Thumb2 mode.
+bool ARMBaseRegisterInfo::
+canSimplifyCallFramePseudos(MachineFunction &MF) const {
+  return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects();
+}
+
 static void
 emitSPUpdate(bool isARM,
              MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
@@ -1127,13 +1128,14 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
 
       ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
       assert(!AFI->isThumb1OnlyFunction() &&
-             "This eliminateCallFramePseudoInstr does not suppor Thumb1!");
+             "This eliminateCallFramePseudoInstr does not support Thumb1!");
       bool isARM = !AFI->isThumbFunction();
 
       // Replace the pseudo instruction with a new instruction...
       unsigned Opc = Old->getOpcode();
-      ARMCC::CondCodes Pred = (ARMCC::CondCodes)Old->getOperand(1).getImm();
-      // FIXME: Thumb2 version of ADJCALLSTACKUP and ADJCALLSTACKDOWN?
+      int PIdx = Old->findFirstPredOperandIdx();
+      ARMCC::CondCodes Pred = (PIdx == -1)
+        ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm();
       if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
         // Note: PredReg is operand 2 for ADJCALLSTACKDOWN.
         unsigned PredReg = Old->getOperand(2).getReg();
@@ -1157,7 +1159,6 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   MachineInstr &MI = *II;
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   assert(!AFI->isThumb1OnlyFunction() &&
          "This eliminateFrameIndex does not support Thumb1!");
@@ -1168,12 +1169,12 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   }
 
   int FrameIndex = MI.getOperand(i).getIndex();
-  int Offset = MFI->getObjectOffset(FrameIndex) + MFI->getStackSize() + SPAdj;
   unsigned FrameReg;
 
-  Offset = getFrameIndexReference(MF, FrameIndex, FrameReg);
+  int Offset = getFrameIndexReference(MF, FrameIndex, FrameReg);
   if (FrameReg != ARM::SP)
     SPAdj = 0;
+  Offset += SPAdj;
 
   // Modify MI as necessary to handle as much of 'Offset' as possible
   bool Done = false;
@@ -1264,7 +1265,7 @@ emitPrologue(MachineFunction &MF) const {
   MachineFrameInfo  *MFI = MF.getFrameInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   assert(!AFI->isThumb1OnlyFunction() &&
-         "This emitPrologue does not suppor Thumb1!");
+         "This emitPrologue does not support Thumb1!");
   bool isARM = !AFI->isThumbFunction();
   unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
   unsigned NumBytes = MFI->getStackSize();
@@ -1349,7 +1350,9 @@ emitPrologue(MachineFunction &MF) const {
   unsigned DPRCSOffset  = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
   unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
   unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
-  AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes);
+  if (STI.isTargetDarwin() || hasFP(MF))
+    AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
+                                NumBytes);
   AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
   AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
   AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
@@ -1425,7 +1428,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
   MachineFrameInfo *MFI = MF.getFrameInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   assert(!AFI->isThumb1OnlyFunction() &&
-         "This emitEpilogue does not suppor Thumb1!");
+         "This emitEpilogue does not support Thumb1!");
   bool isARM = !AFI->isThumbFunction();
 
   unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index 33ba21d..64f6ff1 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -138,6 +138,7 @@ public:
   virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const;
 
   virtual bool hasReservedCallFrame(MachineFunction &MF) const;
+  virtual bool canSimplifyCallFramePseudos(MachineFunction &MF) const;
 
   virtual void eliminateCallFramePseudoInstr(MachineFunction &MF,
                                              MachineBasicBlock &MBB,
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index bd703f4..21c6cb3 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -86,6 +86,7 @@ namespace {
     void emitWordLE(unsigned Binary);
     void emitDWordLE(uint64_t Binary);
     void emitConstPoolInstruction(const MachineInstr &MI);
+    void emitMOVi32immInstruction(const MachineInstr &MI);
     void emitMOVi2piecesInstruction(const MachineInstr &MI);
     void emitLEApcrelJTInstruction(const MachineInstr &MI);
     void emitPseudoMoveInstruction(const MachineInstr &MI);
@@ -143,6 +144,15 @@ namespace {
       return getMachineOpValue(MI, MI.getOperand(OpIdx));
     }
 
+    /// getMovi32Value - Return binary encoding of operand for movw/movt. If the 
+    /// machine operand requires relocation, record the relocation and return zero.
+    unsigned getMovi32Value(const MachineInstr &MI,const MachineOperand &MO, 
+                            unsigned Reloc);
+    unsigned getMovi32Value(const MachineInstr &MI, unsigned OpIdx, 
+                            unsigned Reloc) {
+      return getMovi32Value(MI, MI.getOperand(OpIdx), Reloc);
+    }
+
     /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
     ///
     unsigned getShiftOp(unsigned Imm) const ;
@@ -214,6 +224,54 @@ unsigned ARMCodeEmitter::getShiftOp(unsigned Imm) const {
   return 0;
 }
 
+/// getMovi32Value - Return binary encoding of operand for movw/movt. If the 
+/// machine operand requires relocation, record the relocation and return zero.
+unsigned ARMCodeEmitter::getMovi32Value(const MachineInstr &MI,
+                                        const MachineOperand &MO, 
+                                        unsigned Reloc) {
+  assert(((Reloc == ARM::reloc_arm_movt) || (Reloc == ARM::reloc_arm_movw)) 
+      && "Relocation to this function should be for movt or movw");
+  switch(MO.getType()) {
+  case MachineOperand::MO_Register:
+    return ARMRegisterInfo::getRegisterNumbering(MO.getReg());
+    break;
+
+  case MachineOperand::MO_Immediate:
+    return static_cast<unsigned>(MO.getImm());
+    break;
+
+  case MachineOperand::MO_FPImmediate:
+    return static_cast<unsigned>(
+        MO.getFPImm()->getValueAPF().bitcastToAPInt().getLimitedValue());
+    break;
+
+  case MachineOperand::MO_MachineBasicBlock:
+    emitMachineBasicBlock(MO.getMBB(), Reloc);
+    break;
+
+  case MachineOperand::MO_ConstantPoolIndex:
+    emitConstPoolAddress(MO.getIndex(), Reloc);
+    break;
+
+  case MachineOperand::MO_JumpTableIndex:
+    emitJumpTableAddress(MO.getIndex(), Reloc);
+    break;
+
+  case MachineOperand::MO_ExternalSymbol:
+    emitExternalSymbolAddress(MO.getSymbolName(), Reloc);
+    break;
+
+  case MachineOperand::MO_GlobalAddress:
+    emitGlobalAddress(MO.getGlobal(), Reloc, true, false);
+    break;
+
+  default:
+    llvm_unreachable("Unsupported immediate operand type for movw/movt");
+    break;
+  }
+  return 0;
+}
+
 /// getMachineOpValue - Return binary encoding of operand. If the machine
 /// operand requires relocation, record the relocation and return zero.
 unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI,
@@ -433,6 +491,41 @@ void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) {
   }
 }
 
+void ARMCodeEmitter::emitMOVi32immInstruction(const MachineInstr &MI) {
+  const MachineOperand &MO0 = MI.getOperand(0);
+  const MachineOperand &MO1 = MI.getOperand(1);
+
+  unsigned Lo16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movw) & 0xFFFF;
+
+  // Emit the 'mov' instruction.
+  unsigned Binary = 0x30 << 20;  // mov: Insts{27-20} = 0b00110000
+
+  // Set the conditional execution predicate.
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Encode Rd.
+  Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+  // Encode imm.
+  Binary |= Lo16 & 0xFFF;
+  Binary |= ((Lo16 >> 12) & 0xF) << 16; // imm4:imm12, Insts[19-16] = imm4, Insts[11-0] = imm12
+  emitWordLE(Binary);
+
+  unsigned Hi16 = (getMovi32Value(MI, MO1, ARM::reloc_arm_movt) >> 16) & 0xFFFF;
+  // Emit the 'mov' instruction.
+  Binary = 0x34 << 20; // movt: Insts[27-20] = 0b00110100
+
+  // Set the conditional execution predicate.
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Encode Rd.
+  Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+  Binary |= Hi16 & 0xFFF;
+  Binary |= ((Hi16 >> 12) & 0xF) << 16;
+  emitWordLE(Binary);
+}
+
 void ARMCodeEmitter::emitMOVi2piecesInstruction(const MachineInstr &MI) {
   const MachineOperand &MO0 = MI.getOperand(0);
   const MachineOperand &MO1 = MI.getOperand(1);
@@ -552,7 +645,6 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
   switch (Opcode) {
   default:
     llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction");
-  // FIXME: Add support for MOVimm32.
   case TargetOpcode::INLINEASM: {
     // We allow inline assembler nodes with empty bodies - they can
     // implicitly define registers, which is ok for JIT.
@@ -599,6 +691,11 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
     emitMiscLoadStoreInstruction(MI, ARM::PC);
     break;
   }
+
+  case ARM::MOVi32imm:
+    emitMOVi32immInstruction(MI);
+    break;
+
   case ARM::MOVi2pieces:
     // Two instructions to materialize a constant.
     emitMOVi2piecesInstruction(MI);
@@ -1138,7 +1235,7 @@ void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) {
   // Set the conditional execution predicate
   Binary |= II->getPredicate(&MI) << ARMII::CondShift;
 
-  if (TID.Opcode == ARM::BX_RET)
+  if (TID.Opcode == ARM::BX_RET || TID.Opcode == ARM::MOVPCLR)
     // The return register is LR.
     Binary |= ARMRegisterInfo::getRegisterNumbering(ARM::LR);
   else
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index a458269..013e00a 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -65,7 +65,7 @@ public:
   }
 
   SDNode *Select(SDNode *N);
-  virtual void InstructionSelect();
+
   bool SelectShifterOperandReg(SDNode *Op, SDValue N, SDValue &A,
                                SDValue &B, SDValue &C);
   bool SelectAddrMode2(SDNode *Op, SDValue N, SDValue &Base,
@@ -201,11 +201,6 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
 }
 
 
-void ARMDAGToDAGISel::InstructionSelect() {
-  SelectRoot(*CurDAG);
-  CurDAG->RemoveDeadNodes();
-}
-
 bool ARMDAGToDAGISel::SelectShifterOperandReg(SDNode *Op,
                                               SDValue N,
                                               SDValue &BaseReg,
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 614e684..6a2c6bb 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -294,6 +294,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setTargetDAGCombine(ISD::SIGN_EXTEND);
     setTargetDAGCombine(ISD::ZERO_EXTEND);
     setTargetDAGCombine(ISD::ANY_EXTEND);
+    setTargetDAGCombine(ISD::SELECT_CC);
   }
 
   computeRegisterProperties();
@@ -544,6 +545,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::VZIP:          return "ARMISD::VZIP";
   case ARMISD::VUZP:          return "ARMISD::VUZP";
   case ARMISD::VTRN:          return "ARMISD::VTRN";
+  case ARMISD::FMAX:          return "ARMISD::FMAX";
+  case ARMISD::FMIN:          return "ARMISD::FMIN";
   }
 }
 
@@ -863,7 +866,8 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
     return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
   }
   return DAG.getStore(Chain, dl, Arg, PtrOff,
-                      PseudoSourceValue::getStack(), LocMemOffset);
+                      PseudoSourceValue::getStack(), LocMemOffset,
+                      false, false, 0);
 }
 
 void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
@@ -920,7 +924,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   // These operations are automatically eliminated by the prolog/epilog pass
   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
 
-  SDValue StackPtr = DAG.getRegister(ARM::SP, MVT::i32);
+  SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
 
   RegsToPassVector RegsToPass;
   SmallVector<SDValue, 8> MemOpChains;
@@ -969,8 +973,6 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                            VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
         } else {
           assert(VA.isMemLoc());
-          if (StackPtr.getNode() == 0)
-            StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
 
           MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
                                                  dl, DAG, VA, Flags));
@@ -983,8 +985,6 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
     } else {
       assert(VA.isMemLoc());
-      if (StackPtr.getNode() == 0)
-        StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
 
       MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
                                              dl, DAG, VA, Flags));
@@ -1031,7 +1031,8 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
       Callee = DAG.getLoad(getPointerTy(), dl,
                            DAG.getEntryNode(), CPAddr,
-                           PseudoSourceValue::getConstantPool(), 0);
+                           PseudoSourceValue::getConstantPool(), 0,
+                           false, false, 0);
       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
       Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
                            getPointerTy(), Callee, PICLabel);
@@ -1052,7 +1053,8 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
       Callee = DAG.getLoad(getPointerTy(), dl,
                            DAG.getEntryNode(), CPAddr,
-                           PseudoSourceValue::getConstantPool(), 0);
+                           PseudoSourceValue::getConstantPool(), 0,
+                           false, false, 0);
       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
       Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
                            getPointerTy(), Callee, PICLabel);
@@ -1238,7 +1240,8 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) {
   }
   CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
   SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
-                               PseudoSourceValue::getConstantPool(), 0);
+                               PseudoSourceValue::getConstantPool(), 0,
+                               false, false, 0);
   if (RelocM == Reloc::Static)
     return Result;
   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
@@ -1261,7 +1264,8 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
   SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
   Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
-                         PseudoSourceValue::getConstantPool(), 0);
+                         PseudoSourceValue::getConstantPool(), 0,
+                         false, false, 0);
   SDValue Chain = Argument.getValue(1);
 
   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
@@ -1278,8 +1282,7 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
     LowerCallTo(Chain, (const Type *) Type::getInt32Ty(*DAG.getContext()),
                 false, false, false, false,
                 0, CallingConv::C, false, /*isReturnValueUsed=*/true,
-                DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl,
-                DAG.GetOrdering(Chain.getNode()));
+                DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
   return CallResult.first;
 }
 
@@ -1308,21 +1311,24 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
-                         PseudoSourceValue::getConstantPool(), 0);
+                         PseudoSourceValue::getConstantPool(), 0,
+                         false, false, 0);
     Chain = Offset.getValue(1);
 
     SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
     Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
 
     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
-                         PseudoSourceValue::getConstantPool(), 0);
+                         PseudoSourceValue::getConstantPool(), 0,
+                         false, false, 0);
   } else {
     // local exec model
     ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, "tpoff");
     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
-                         PseudoSourceValue::getConstantPool(), 0);
+                         PseudoSourceValue::getConstantPool(), 0,
+                         false, false, 0);
   }
 
   // The address of the thread local variable is the add of the thread
@@ -1358,13 +1364,15 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
     SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
                                  CPAddr,
-                                 PseudoSourceValue::getConstantPool(), 0);
+                                 PseudoSourceValue::getConstantPool(), 0,
+                                 false, false, 0);
     SDValue Chain = Result.getValue(1);
     SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
     Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
     if (!UseGOTOFF)
       Result = DAG.getLoad(PtrVT, dl, Chain, Result,
-                           PseudoSourceValue::getGOT(), 0);
+                           PseudoSourceValue::getGOT(), 0,
+                           false, false, 0);
     return Result;
   } else {
     // If we have T2 ops, we can materialize the address directly via movt/movw
@@ -1376,7 +1384,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
       SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
       return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
-                         PseudoSourceValue::getConstantPool(), 0);
+                         PseudoSourceValue::getConstantPool(), 0,
+                         false, false, 0);
     }
   }
 }
@@ -1403,7 +1412,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
   CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
 
   SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
-                               PseudoSourceValue::getConstantPool(), 0);
+                               PseudoSourceValue::getConstantPool(), 0,
+                               false, false, 0);
   SDValue Chain = Result.getValue(1);
 
   if (RelocM == Reloc::PIC_) {
@@ -1413,7 +1423,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
 
   if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
     Result = DAG.getLoad(PtrVT, dl, Chain, Result,
-                         PseudoSourceValue::getGOT(), 0);
+                         PseudoSourceValue::getGOT(), 0,
+                         false, false, 0);
 
   return Result;
 }
@@ -1434,7 +1445,8 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
   SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
-                               PseudoSourceValue::getConstantPool(), 0);
+                               PseudoSourceValue::getConstantPool(), 0,
+                               false, false, 0);
   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
 }
@@ -1467,7 +1479,8 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
     SDValue Result =
       DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
-                  PseudoSourceValue::getConstantPool(), 0);
+                  PseudoSourceValue::getConstantPool(), 0,
+                  false, false, 0);
     SDValue Chain = Result.getValue(1);
 
     if (RelocM == Reloc::PIC_) {
@@ -1515,7 +1528,8 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
-  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0);
+  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0,
+                      false, false, 0);
 }
 
 SDValue
@@ -1592,7 +1606,8 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
     // Create load node to retrieve arguments from the stack.
     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
     ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
-                            PseudoSourceValue::getFixedStack(FI), 0);
+                            PseudoSourceValue::getFixedStack(FI), 0,
+                            false, false, 0);
   } else {
     Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
     ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
@@ -1707,7 +1722,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
       // Create load nodes to retrieve arguments from the stack.
       SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
       InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
-                                   PseudoSourceValue::getFixedStack(FI), 0));
+                                   PseudoSourceValue::getFixedStack(FI), 0,
+                                   false, false, 0));
     }
   }
 
@@ -1745,7 +1761,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
         unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC);
         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
         SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
-                        PseudoSourceValue::getFixedStack(VarArgsFrameIndex), 0);
+                                     PseudoSourceValue::getFixedStack(VarArgsFrameIndex), 0,
+                                     false, false, 0);
         MemOps.push_back(Store);
         FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
                           DAG.getConstant(4, getPointerTy()));
@@ -1939,13 +1956,14 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) {
   }
   if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
     Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
-                       PseudoSourceValue::getJumpTable(), 0);
+                       PseudoSourceValue::getJumpTable(), 0,
+                       false, false, 0);
     Chain = Addr.getValue(1);
     Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
   } else {
     Addr = DAG.getLoad(PTy, dl, Chain, Addr,
-                       PseudoSourceValue::getJumpTable(), 0);
+                       PseudoSourceValue::getJumpTable(), 0, false, false, 0);
     Chain = Addr.getValue(1);
     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
   }
@@ -1993,7 +2011,8 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
     ? ARM::R7 : ARM::R11;
   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
   while (Depth--)
-    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0);
+    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0,
+                            false, false, 0);
   return FrameAddr;
 }
 
@@ -2038,7 +2057,7 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
       Loads[i] = DAG.getLoad(VT, dl, Chain,
                              DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
                                          DAG.getConstant(SrcOff, MVT::i32)),
-                             SrcSV, SrcSVOff + SrcOff);
+                             SrcSV, SrcSVOff + SrcOff, false, false, 0);
       TFOps[i] = Loads[i].getValue(1);
       SrcOff += VTSize;
     }
@@ -2047,9 +2066,9 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
     for (i = 0;
          i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
       TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
-                           DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
-                                       DAG.getConstant(DstOff, MVT::i32)),
-                           DstSV, DstSVOff + DstOff);
+                              DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
+                                          DAG.getConstant(DstOff, MVT::i32)),
+                              DstSV, DstSVOff + DstOff, false, false, 0);
       DstOff += VTSize;
     }
     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
@@ -2075,7 +2094,7 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
     Loads[i] = DAG.getLoad(VT, dl, Chain,
                            DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
                                        DAG.getConstant(SrcOff, MVT::i32)),
-                           SrcSV, SrcSVOff + SrcOff);
+                           SrcSV, SrcSVOff + SrcOff, false, false, 0);
     TFOps[i] = Loads[i].getValue(1);
     ++i;
     SrcOff += VTSize;
@@ -2097,7 +2116,7 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
     TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
                             DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
                                         DAG.getConstant(DstOff, MVT::i32)),
-                            DstSV, DstSVOff + DstOff);
+                            DstSV, DstSVOff + DstOff, false, false, 0);
     ++i;
     DstOff += VTSize;
     BytesLeft -= VTSize;
@@ -3835,23 +3854,106 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC
+/// to match f32 max/min patterns to use NEON vmax/vmin instructions.
+static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
+                                       const ARMSubtarget *ST) {
+  // If the target supports NEON, try to use vmax/vmin instructions for f32
+  // selects like "x < y ? x : y".  Unless the FiniteOnlyFPMath option is set,
+  // be careful about NaNs:  NEON's vmax/vmin return NaN if either operand is
+  // a NaN; only do the transformation when it matches that behavior.
+
+  // For now only do this when using NEON for FP operations; if using VFP, it
+  // is not obvious that the benefit outweighs the cost of switching to the
+  // NEON pipeline.
+  if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() ||
+      N->getValueType(0) != MVT::f32)
+    return SDValue();
+
+  SDValue CondLHS = N->getOperand(0);
+  SDValue CondRHS = N->getOperand(1);
+  SDValue LHS = N->getOperand(2);
+  SDValue RHS = N->getOperand(3);
+  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+
+  unsigned Opcode = 0;
+  bool IsReversed;
+  if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) {
+    IsReversed = false; // x CC y ? x : y
+  } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) {
+    IsReversed = true ; // x CC y ? y : x
+  } else {
+    return SDValue();
+  }
+
+  bool IsUnordered;
+  switch (CC) {
+  default: break;
+  case ISD::SETOLT:
+  case ISD::SETOLE:
+  case ISD::SETLT:
+  case ISD::SETLE:
+  case ISD::SETULT:
+  case ISD::SETULE:
+    // If LHS is NaN, an ordered comparison will be false and the result will
+    // be the RHS, but vmin(NaN, RHS) = NaN.  Avoid this by checking that LHS
+    // != NaN.  Likewise, for unordered comparisons, check for RHS != NaN.
+    IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE);
+    if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
+      break;
+    // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin
+    // will return -0, so vmin can only be used for unsafe math or if one of
+    // the operands is known to be nonzero.
+    if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&
+        !UnsafeFPMath &&
+        !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
+      break;
+    Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
+    break;
+
+  case ISD::SETOGT:
+  case ISD::SETOGE:
+  case ISD::SETGT:
+  case ISD::SETGE:
+  case ISD::SETUGT:
+  case ISD::SETUGE:
+    // If LHS is NaN, an ordered comparison will be false and the result will
+    // be the RHS, but vmax(NaN, RHS) = NaN.  Avoid this by checking that LHS
+    // != NaN.  Likewise, for unordered comparisons, check for RHS != NaN.
+    IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE);
+    if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
+      break;
+    // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax
+    // will return +0, so vmax can only be used for unsafe math or if one of
+    // the operands is known to be nonzero.
+    if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&
+        !UnsafeFPMath &&
+        !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
+      break;
+    Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
+    break;
+  }
+
+  if (!Opcode)
+    return SDValue();
+  return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS);
+}
+
 SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
                                              DAGCombinerInfo &DCI) const {
   switch (N->getOpcode()) {
   default: break;
-  case ISD::ADD:      return PerformADDCombine(N, DCI);
-  case ISD::SUB:      return PerformSUBCombine(N, DCI);
+  case ISD::ADD:        return PerformADDCombine(N, DCI);
+  case ISD::SUB:        return PerformSUBCombine(N, DCI);
   case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
-  case ISD::INTRINSIC_WO_CHAIN:
-    return PerformIntrinsicCombine(N, DCI.DAG);
+  case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
   case ISD::SHL:
   case ISD::SRA:
-  case ISD::SRL:
-    return PerformShiftCombine(N, DCI.DAG, Subtarget);
+  case ISD::SRL:        return PerformShiftCombine(N, DCI.DAG, Subtarget);
   case ISD::SIGN_EXTEND:
   case ISD::ZERO_EXTEND:
-  case ISD::ANY_EXTEND:
-    return PerformExtendCombine(N, DCI.DAG, Subtarget);
+  case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
+  case ISD::SELECT_CC:  return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
   }
   return SDValue();
 }
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 3c5df45..f8f8adc 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -131,7 +131,11 @@ namespace llvm {
       VREV16,       // reverse elements within 16-bit halfwords
       VZIP,         // zip (interleave)
       VUZP,         // unzip (deinterleave)
-      VTRN          // transpose
+      VTRN,         // transpose
+
+      // Floating-point max and min:
+      FMAX,
+      FMIN
     };
   }
 
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 169eeed..76595fa 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -56,6 +56,9 @@ def NEONGetLnFrm  : Format<25>;
 def NEONSetLnFrm  : Format<26>;
 def NEONDupFrm    : Format<27>;
 
+def MiscFrm       : Format<29>;
+def ThumbMiscFrm  : Format<30>;
+
 // Misc flags.
 
 // the instruction has a Rn register operand.
@@ -705,6 +708,20 @@ class AI3ldsbpr<dag oops, dag iops, Format f, InstrItinClass itin,
   let Inst{24}    = 1; // P bit
   let Inst{27-25} = 0b000;
 }
+class AI3lddpr<dag oops, dag iops, Format f, InstrItinClass itin,
+             string opc, string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin,
+      opc, asm, cstr, pattern> {
+  let Inst{4}     = 1;
+  let Inst{5}     = 0; // H bit
+  let Inst{6}     = 1; // S bit
+  let Inst{7}     = 1;
+  let Inst{20}    = 0; // L bit
+  let Inst{21}    = 1; // W bit
+  let Inst{24}    = 1; // P bit
+  let Inst{27-25} = 0b000;
+}
+
 
 // Pre-indexed stores
 class AI3sthpr<dag oops, dag iops, Format f, InstrItinClass itin,
@@ -720,6 +737,19 @@ class AI3sthpr<dag oops, dag iops, Format f, InstrItinClass itin,
   let Inst{24}    = 1; // P bit
   let Inst{27-25} = 0b000;
 }
+class AI3stdpr<dag oops, dag iops, Format f, InstrItinClass itin,
+             string opc, string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin,
+      opc, asm, cstr, pattern> {
+  let Inst{4}     = 1;
+  let Inst{5}     = 1; // H bit
+  let Inst{6}     = 1; // S bit
+  let Inst{7}     = 1;
+  let Inst{20}    = 0; // L bit
+  let Inst{21}    = 1; // W bit
+  let Inst{24}    = 1; // P bit
+  let Inst{27-25} = 0b000;
+}
 
 // Post-indexed loads
 class AI3ldhpo<dag oops, dag iops, Format f, InstrItinClass itin,
@@ -731,7 +761,7 @@ class AI3ldhpo<dag oops, dag iops, Format f, InstrItinClass itin,
   let Inst{6}     = 0; // S bit
   let Inst{7}     = 1;
   let Inst{20}    = 1; // L bit
-  let Inst{21}    = 1; // W bit
+  let Inst{21}    = 0; // W bit
   let Inst{24}    = 0; // P bit
   let Inst{27-25} = 0b000;
 }
@@ -744,7 +774,7 @@ class AI3ldshpo<dag oops, dag iops, Format f, InstrItinClass itin,
   let Inst{6}     = 1; // S bit
   let Inst{7}     = 1;
   let Inst{20}    = 1; // L bit
-  let Inst{21}    = 1; // W bit
+  let Inst{21}    = 0; // W bit
   let Inst{24}    = 0; // P bit
   let Inst{27-25} = 0b000;
 }
@@ -757,7 +787,20 @@ class AI3ldsbpo<dag oops, dag iops, Format f, InstrItinClass itin,
   let Inst{6}     = 1; // S bit
   let Inst{7}     = 1;
   let Inst{20}    = 1; // L bit
-  let Inst{21}    = 1; // W bit
+  let Inst{21}    = 0; // W bit
+  let Inst{24}    = 0; // P bit
+  let Inst{27-25} = 0b000;
+}
+class AI3lddpo<dag oops, dag iops, Format f, InstrItinClass itin,
+             string opc, string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin,
+      opc, asm, cstr, pattern> {
+  let Inst{4}     = 1;
+  let Inst{5}     = 0; // H bit
+  let Inst{6}     = 1; // S bit
+  let Inst{7}     = 1;
+  let Inst{20}    = 0; // L bit
+  let Inst{21}    = 0; // W bit
   let Inst{24}    = 0; // P bit
   let Inst{27-25} = 0b000;
 }
@@ -772,7 +815,20 @@ class AI3sthpo<dag oops, dag iops, Format f, InstrItinClass itin,
   let Inst{6}     = 0; // S bit
   let Inst{7}     = 1;
   let Inst{20}    = 0; // L bit
-  let Inst{21}    = 1; // W bit
+  let Inst{21}    = 0; // W bit
+  let Inst{24}    = 0; // P bit
+  let Inst{27-25} = 0b000;
+}
+class AI3stdpo<dag oops, dag iops, Format f, InstrItinClass itin,
+             string opc, string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin,
+      opc, asm, cstr, pattern> {
+  let Inst{4}     = 1;
+  let Inst{5}     = 1; // H bit
+  let Inst{6}     = 1; // S bit
+  let Inst{7}     = 1;
+  let Inst{20}    = 0; // L bit
+  let Inst{21}    = 0; // W bit
   let Inst{24}    = 0; // P bit
   let Inst{27-25} = 0b000;
 }
@@ -1147,6 +1203,19 @@ class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre,
   let Inst{8} = 1; // The W bit.
 }
 
+// Helper class for disassembly only
+// A6.3.16 & A6.3.17
+// T2Imac - Thumb2 multiply [accumulate, and absolute difference] instructions.
+class T2I_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops, dag iops,
+             InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-24} = 0b011;
+  let Inst{23} = long;
+  let Inst{22-20} = op22_20;
+  let Inst{7-4} = op7_4;
+}
+
 // Tv5Pat - Same as Pat<>, but requires V5T Thumb mode.
 class Tv5Pat<dag pattern, dag result> : Pat<pattern, result> {
   list<Predicate> Predicates = [IsThumb1Only, HasV5T];
@@ -1324,6 +1393,15 @@ class AVConv1I<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4,
   let Inst{4}     = 0;
 }
 
+// VFP conversion between floating-point and fixed-point
+class AVConv1XI<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5,
+               dag oops, dag iops, InstrItinClass itin, string opc, string asm,
+               list<dag> pattern>
+  : AVConv1I<op1, op2, op3, op4, oops, iops, itin, opc, asm, pattern> {
+  // size (fixed-point number): sx == 0 ? 16 : 32
+  let Inst{7} = op5; // sx
+}
+
 // VFP conversion instructions, if no NEON
 class AVConv1In<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4,
                 dag oops, dag iops, InstrItinClass itin,
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 852c74e..3812aba 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -113,6 +113,8 @@ def ARMrbit          : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>;
 //===----------------------------------------------------------------------===//
 // ARM Instruction Predicate Definitions.
 //
+def HasV4T    : Predicate<"Subtarget->hasV4TOps()">;
+def NoV4T     : Predicate<"!Subtarget->hasV4TOps()">;
 def HasV5T    : Predicate<"Subtarget->hasV5TOps()">;
 def HasV5TE   : Predicate<"Subtarget->hasV5TEOps()">;
 def HasV6     : Predicate<"Subtarget->hasV6Ops()">;
@@ -130,8 +132,6 @@ def IsThumb2  : Predicate<"Subtarget->isThumb2()">;
 def IsARM     : Predicate<"!Subtarget->isThumb()">;
 def IsDarwin    : Predicate<"Subtarget->isTargetDarwin()">;
 def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">;
-def CarryDefIsUnused : Predicate<"!N->hasAnyUseOfValue(1)">;
-def CarryDefIsUsed   : Predicate<"N->hasAnyUseOfValue(1)">;
 
 // FIXME: Eventually this will be just "hasV6T2Ops".
 def UseMovt   : Predicate<"Subtarget->useMovt()">;
@@ -176,7 +176,7 @@ def imm16_31 : PatLeaf<(i32 imm), [{
   return (int32_t)N->getZExtValue() >= 16 && (int32_t)N->getZExtValue() < 32;
 }]>;
 
-def so_imm_neg : 
+def so_imm_neg :
   PatLeaf<(imm), [{
     return ARM_AM::getSOImmVal(-(int)N->getZExtValue()) != -1;
   }], so_imm_neg_XFORM>;
@@ -194,7 +194,7 @@ def sext_16_node : PatLeaf<(i32 GPR:$a), [{
 /// bf_inv_mask_imm predicate - An AND mask to clear an arbitrary width bitfield
 /// e.g., 0xf000ffff
 def bf_inv_mask_imm : Operand<i32>,
-                      PatLeaf<(imm), [{ 
+                      PatLeaf<(imm), [{
   uint32_t v = (uint32_t)N->getZExtValue();
   if (v == 0xffffffff)
     return 0;
@@ -227,7 +227,7 @@ def lo16AllZero : PatLeaf<(i32 imm), [{
   return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0;
 }], hi16>;
 
-/// imm0_65535 predicate - True if the 32-bit immediate is in the range 
+/// imm0_65535 predicate - True if the 32-bit immediate is in the range
 /// [0.65535].
 def imm0_65535 : PatLeaf<(i32 imm), [{
   return (uint32_t)N->getZExtValue() < 65536;
@@ -236,6 +236,21 @@ def imm0_65535 : PatLeaf<(i32 imm), [{
 class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
 class UnOpFrag <dag res> : PatFrag<(ops node:$Src), res>;
 
+/// adde and sube predicates - True based on whether the carry flag output
+/// will be needed or not.
+def adde_dead_carry :
+  PatFrag<(ops node:$LHS, node:$RHS), (adde node:$LHS, node:$RHS),
+  [{return !N->hasAnyUseOfValue(1);}]>;
+def sube_dead_carry :
+  PatFrag<(ops node:$LHS, node:$RHS), (sube node:$LHS, node:$RHS),
+  [{return !N->hasAnyUseOfValue(1);}]>;
+def adde_live_carry :
+  PatFrag<(ops node:$LHS, node:$RHS), (adde node:$LHS, node:$RHS),
+  [{return N->hasAnyUseOfValue(1);}]>;
+def sube_live_carry :
+  PatFrag<(ops node:$LHS, node:$RHS), (sube node:$LHS, node:$RHS),
+  [{return N->hasAnyUseOfValue(1);}]>;
+
 //===----------------------------------------------------------------------===//
 // Operand Definitions.
 //
@@ -501,6 +516,22 @@ multiclass AI_unary_rrot<bits<8> opcod, string opc, PatFrag opnode> {
   }
 }
 
+multiclass AI_unary_rrot_np<bits<8> opcod, string opc> {
+  def r     : AExtI<opcod, (outs GPR:$dst), (ins GPR:$src),
+                 IIC_iUNAr, opc, "\t$dst, $src",
+                 [/* For disassembly only; pattern left blank */]>,
+              Requires<[IsARM, HasV6]> {
+    let Inst{11-10} = 0b00;
+    let Inst{19-16} = 0b1111;
+  }
+  def r_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$src, i32imm:$rot),
+                 IIC_iUNAsi, opc, "\t$dst, $src, ror $rot",
+                 [/* For disassembly only; pattern left blank */]>,
+              Requires<[IsARM, HasV6]> {
+    let Inst{19-16} = 0b1111;
+  }
+}
+
 /// AI_bin_rrot - A binary operation with two forms: one whose operand is a
 /// register and one whose operand is a register rotated by 8/16/24.
 multiclass AI_bin_rrot<bits<8> opcod, string opc, PatFrag opnode> {
@@ -510,13 +541,29 @@ multiclass AI_bin_rrot<bits<8> opcod, string opc, PatFrag opnode> {
                Requires<[IsARM, HasV6]> {
     let Inst{11-10} = 0b00;
   }
-  def rr_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot),
+  def rr_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS,
+                                              i32imm:$rot),
                   IIC_iALUsi, opc, "\t$dst, $LHS, $RHS, ror $rot",
                   [(set GPR:$dst, (opnode GPR:$LHS,
                                           (rotr GPR:$RHS, rot_imm:$rot)))]>,
                   Requires<[IsARM, HasV6]>;
 }
 
+// For disassembly only.
+multiclass AI_bin_rrot_np<bits<8> opcod, string opc> {
+  def rr     : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS),
+                  IIC_iALUr, opc, "\t$dst, $LHS, $RHS",
+                  [/* For disassembly only; pattern left blank */]>,
+               Requires<[IsARM, HasV6]> {
+    let Inst{11-10} = 0b00;
+  }
+  def rr_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS,
+                                              i32imm:$rot),
+                  IIC_iALUsi, opc, "\t$dst, $LHS, $RHS, ror $rot",
+                  [/* For disassembly only; pattern left blank */]>,
+                  Requires<[IsARM, HasV6]>;
+}
+
 /// AI1_adde_sube_irs - Define instructions and patterns for adde and sube.
 let Uses = [CPSR] in {
 multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
@@ -524,13 +571,13 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
   def ri : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
                 DPFrm, IIC_iALUi, opc, "\t$dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>,
-               Requires<[IsARM, CarryDefIsUnused]> {
+               Requires<[IsARM]> {
     let Inst{25} = 1;
   }
   def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
                 DPFrm, IIC_iALUr, opc, "\t$dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>,
-               Requires<[IsARM, CarryDefIsUnused]> {
+               Requires<[IsARM]> {
     let isCommutable = Commutable;
     let Inst{11-4} = 0b00000000;
     let Inst{25} = 0;
@@ -538,7 +585,7 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
   def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
                 DPSoRegFrm, IIC_iALUsr, opc, "\t$dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>,
-               Requires<[IsARM, CarryDefIsUnused]> {
+               Requires<[IsARM]> {
     let Inst{25} = 0;
   }
 }
@@ -549,16 +596,14 @@ multiclass AI1_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
   def Sri : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
                 DPFrm, IIC_iALUi, !strconcat(opc, "\t$dst, $a, $b"),
                [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>,
-               Requires<[IsARM, CarryDefIsUsed]> {
-    let Defs = [CPSR];
+               Requires<[IsARM]> {
     let Inst{20} = 1;
     let Inst{25} = 1;
   }
   def Srr : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
                 DPFrm, IIC_iALUr, !strconcat(opc, "\t$dst, $a, $b"),
                [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>,
-               Requires<[IsARM, CarryDefIsUsed]> {
-    let Defs = [CPSR];
+               Requires<[IsARM]> {
     let Inst{11-4} = 0b00000000;
     let Inst{20} = 1;
     let Inst{25} = 0;
@@ -566,8 +611,7 @@ multiclass AI1_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
   def Srs : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
                 DPSoRegFrm, IIC_iALUsr, !strconcat(opc, "\t$dst, $a, $b"),
                [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>,
-               Requires<[IsARM, CarryDefIsUsed]> {
-    let Defs = [CPSR];
+               Requires<[IsARM]> {
     let Inst{20} = 1;
     let Inst{25} = 0;
   }
@@ -593,18 +637,153 @@ PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
                     i32imm:$size), NoItinerary,
            "${instid:label} ${cpidx:cpentry}", []>;
 
-let Defs = [SP], Uses = [SP] in {
+// FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE
+// from removing one half of the matched pairs. That breaks PEI, which assumes
+// these will always be in pairs, and asserts if it finds otherwise. Better way?
+let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
 def ADJCALLSTACKUP :
 PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p), NoItinerary,
            "@ ADJCALLSTACKUP $amt1",
            [(ARMcallseq_end timm:$amt1, timm:$amt2)]>;
 
-def ADJCALLSTACKDOWN : 
+def ADJCALLSTACKDOWN :
 PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary,
            "@ ADJCALLSTACKDOWN $amt",
            [(ARMcallseq_start timm:$amt)]>;
 }
 
+def NOP : AI<(outs), (ins), MiscFrm, NoItinerary, "nop", "",
+             [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsARM, HasV6T2]> {
+  let Inst{27-16} = 0b001100100000;
+  let Inst{7-0} = 0b00000000;
+}
+
+def YIELD : AI<(outs), (ins), MiscFrm, NoItinerary, "yield", "",
+             [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsARM, HasV6T2]> {
+  let Inst{27-16} = 0b001100100000;
+  let Inst{7-0} = 0b00000001;
+}
+
+def WFE : AI<(outs), (ins), MiscFrm, NoItinerary, "wfe", "",
+             [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsARM, HasV6T2]> {
+  let Inst{27-16} = 0b001100100000;
+  let Inst{7-0} = 0b00000010;
+}
+
+def WFI : AI<(outs), (ins), MiscFrm, NoItinerary, "wfi", "",
+             [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsARM, HasV6T2]> {
+  let Inst{27-16} = 0b001100100000;
+  let Inst{7-0} = 0b00000011;
+}
+
+def SEL : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, NoItinerary, "sel",
+             "\t$dst, $a, $b",
+             [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsARM, HasV6]> {
+  let Inst{27-20} = 0b01101000;
+  let Inst{7-4} = 0b1011;
+}
+
+def SEV : AI<(outs), (ins), MiscFrm, NoItinerary, "sev", "",
+             [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsARM, HasV6T2]> {
+  let Inst{27-16} = 0b001100100000;
+  let Inst{7-0} = 0b00000100;
+}
+
+// The i32imm operand $val can be used by a debugger to store more information
+// about the breakpoint.
+def BKPT : AI<(outs), (ins i32imm:$val), MiscFrm, NoItinerary, "bkpt", "\t$val",
+              [/* For disassembly only; pattern left blank */]>,
+           Requires<[IsARM]> {
+  let Inst{27-20} = 0b00010010;
+  let Inst{7-4} = 0b0111;
+}
+
+// Change Processor State is a system instruction -- for disassembly only.
+// The singleton $opt operand contains the following information:
+// opt{4-0} = mode from Inst{4-0}
+// opt{5} = changemode from Inst{17}
+// opt{8-6} = AIF from Inst{8-6}
+// opt{10-9} = imod from Inst{19-18} with 0b10 as enable and 0b11 as disable
+def CPS : AXI<(outs),(ins i32imm:$opt), MiscFrm, NoItinerary, "cps${opt:cps}",
+              [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsARM]> {
+  let Inst{31-28} = 0b1111;
+  let Inst{27-20} = 0b00010000;
+  let Inst{16} = 0;
+  let Inst{5} = 0;
+}
+
+// Preload signals the memory system of possible future data/instruction access.
+// These are for disassembly only.
+multiclass APreLoad<bit data, bit read, string opc> {
+
+  def i : AXI<(outs), (ins GPR:$base, i32imm:$imm), MiscFrm, NoItinerary,
+               !strconcat(opc, "\t[$base, $imm]"), []> {
+    let Inst{31-26} = 0b111101;
+    let Inst{25} = 0; // 0 for immediate form
+    let Inst{24} = data;
+    let Inst{22} = read;
+    let Inst{21-20} = 0b01;
+  }
+
+  def r : AXI<(outs), (ins addrmode2:$addr), MiscFrm, NoItinerary,
+               !strconcat(opc, "\t$addr"), []> {
+    let Inst{31-26} = 0b111101;
+    let Inst{25} = 1; // 1 for register form
+    let Inst{24} = data;
+    let Inst{22} = read;
+    let Inst{21-20} = 0b01;
+    let Inst{4} = 0;
+  }
+}
+
+defm PLD  : APreLoad<1, 1, "pld">;
+defm PLDW : APreLoad<1, 0, "pldw">;
+defm PLI  : APreLoad<0, 1, "pli">;
+
+def SETENDBE : AXI<(outs),(ins), MiscFrm, NoItinerary, "setend\tbe",
+                   [/* For disassembly only; pattern left blank */]>,
+               Requires<[IsARM]> {
+  let Inst{31-28} = 0b1111;
+  let Inst{27-20} = 0b00010000;
+  let Inst{16} = 1;
+  let Inst{9} = 1;
+  let Inst{7-4} = 0b0000;
+}
+
+def SETENDLE : AXI<(outs),(ins), MiscFrm, NoItinerary, "setend\tle",
+                   [/* For disassembly only; pattern left blank */]>,
+               Requires<[IsARM]> {
+  let Inst{31-28} = 0b1111;
+  let Inst{27-20} = 0b00010000;
+  let Inst{16} = 1;
+  let Inst{9} = 0;
+  let Inst{7-4} = 0b0000;
+}
+
+def DBG : AI<(outs), (ins i32imm:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt",
+             [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsARM, HasV7]> {
+  let Inst{27-16} = 0b001100100000;
+  let Inst{7-4} = 0b1111;
+}
+
+// A5.4 Permanently UNDEFINED instructions.
+def TRAP : AI<(outs), (ins), MiscFrm, NoItinerary, "trap", "",
+              [/* For disassembly only; pattern left blank */]>,
+           Requires<[IsARM]> {
+  let Inst{27-25} = 0b011;
+  let Inst{24-20} = 0b11111;
+  let Inst{7-5} = 0b111;
+  let Inst{4} = 0b1;
+}
+
 // Address computation and loads and stores in PIC mode.
 let isNotDuplicable = 1 in {
 def PICADD : AXI1<0b0100, (outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p),
@@ -665,7 +844,7 @@ def LEApcrelJT : AXI1<0x0, (outs GPR:$dst),
                          "(${label}_${id}-(",
                                   "${:private}PCRELL${:uid}+8))\n"),
                        !strconcat("${:private}PCRELL${:uid}:\n\t",
-                                  "add$p\t$dst, pc, #${:private}PCRELV${:uid}")),
+                                 "add$p\t$dst, pc, #${:private}PCRELV${:uid}")),
                    []> {
     let Inst{25} = 1;
 }
@@ -674,24 +853,50 @@ def LEApcrelJT : AXI1<0x0, (outs GPR:$dst),
 //  Control Flow Instructions.
 //
 
-let isReturn = 1, isTerminator = 1, isBarrier = 1 in
-  def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br, 
-                  "bx", "\tlr", [(ARMretflag)]> {
-  let Inst{3-0}   = 0b1110;
-  let Inst{7-4}   = 0b0001;
-  let Inst{19-8}  = 0b111111111111;
-  let Inst{27-20} = 0b00010010;
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+  // ARMV4T and above
+  def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br,
+                  "bx", "\tlr", [(ARMretflag)]>,
+               Requires<[IsARM, HasV4T]> {
+    let Inst{3-0}   = 0b1110;
+    let Inst{7-4}   = 0b0001;
+    let Inst{19-8}  = 0b111111111111;
+    let Inst{27-20} = 0b00010010;
+  }
+
+  // ARMV4 only
+  def MOVPCLR : AI<(outs), (ins), BrMiscFrm, IIC_Br, 
+                  "mov", "\tpc, lr", [(ARMretflag)]>,
+               Requires<[IsARM, NoV4T]> {
+    let Inst{11-0}  = 0b000000001110;
+    let Inst{15-12} = 0b1111;
+    let Inst{19-16} = 0b0000;
+    let Inst{27-20} = 0b00011010;
+  }
 }
 
 // Indirect branches
 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+  // ARMV4T and above
   def BRIND : AXI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "bx\t$dst",
-                  [(brind GPR:$dst)]> {
+                  [(brind GPR:$dst)]>,
+              Requires<[IsARM, HasV4T]> {
     let Inst{7-4}   = 0b0001;
     let Inst{19-8}  = 0b111111111111;
     let Inst{27-20} = 0b00010010;
     let Inst{31-28} = 0b1110;
   }
+
+  // ARMV4 only
+  def MOVPCRX : AXI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "mov\tpc, $dst",
+                  [(brind GPR:$dst)]>,
+              Requires<[IsARM, NoV4T]> {
+    let Inst{11-4}  = 0b00000000;
+    let Inst{15-12} = 0b1111;
+    let Inst{19-16} = 0b0000;
+    let Inst{27-20} = 0b00011010;
+    let Inst{31-28} = 0b1110;
+  }
 }
 
 // FIXME: remove when we have a way to marking a MI with these properties.
@@ -732,14 +937,26 @@ let isCall = 1,
   }
 
   // ARMv4T
-  def BX : ABXIx2<(outs), (ins GPR:$func, variable_ops),
+  // Note: Restrict $func to the tGPR regclass to prevent it being in LR.
+  def BX : ABXIx2<(outs), (ins tGPR:$func, variable_ops),
                   IIC_Br, "mov\tlr, pc\n\tbx\t$func",
-                  [(ARMcall_nolink GPR:$func)]>,
-           Requires<[IsARM, IsNotDarwin]> {
+                  [(ARMcall_nolink tGPR:$func)]>,
+           Requires<[IsARM, HasV4T, IsNotDarwin]> {
     let Inst{7-4}   = 0b0001;
     let Inst{19-8}  = 0b111111111111;
     let Inst{27-20} = 0b00010010;
   }
+
+  // ARMv4
+  def BMOVPCRX : ABXIx2<(outs), (ins tGPR:$func, variable_ops),
+                 IIC_Br, "mov\tlr, pc\n\tmov\tpc, $func",
+                 [(ARMcall_nolink tGPR:$func)]>,
+           Requires<[IsARM, NoV4T, IsNotDarwin]> {
+    let Inst{11-4}  = 0b00000000;
+    let Inst{15-12} = 0b1111;
+    let Inst{19-16} = 0b0000;
+    let Inst{27-20} = 0b00011010;
+  }
 }
 
 // On Darwin R9 is call-clobbered.
@@ -769,13 +986,26 @@ let isCall = 1,
   }
 
   // ARMv4T
-  def BXr9 : ABXIx2<(outs), (ins GPR:$func, variable_ops),
+  // Note: Restrict $func to the tGPR regclass to prevent it being in LR.
+  def BXr9 : ABXIx2<(outs), (ins tGPR:$func, variable_ops),
                   IIC_Br, "mov\tlr, pc\n\tbx\t$func",
-                  [(ARMcall_nolink GPR:$func)]>, Requires<[IsARM, IsDarwin]> {
+                  [(ARMcall_nolink tGPR:$func)]>,
+             Requires<[IsARM, HasV4T, IsDarwin]> {
     let Inst{7-4}   = 0b0001;
     let Inst{19-8}  = 0b111111111111;
     let Inst{27-20} = 0b00010010;
   }
+
+  // ARMv4
+  def BMOVPCRXr9 : ABXIx2<(outs), (ins tGPR:$func, variable_ops),
+                 IIC_Br, "mov\tlr, pc\n\tmov\tpc, $func",
+                 [(ARMcall_nolink tGPR:$func)]>,
+           Requires<[IsARM, NoV4T, IsDarwin]> {
+    let Inst{11-4}  = 0b00000000;
+    let Inst{15-12} = 0b1111;
+    let Inst{19-16} = 0b0000;
+    let Inst{27-20} = 0b00011010;
+  }
 }
 
 let isBranch = 1, isTerminator = 1 in {
@@ -821,25 +1051,75 @@ let isBranch = 1, isTerminator = 1 in {
   } // isBarrier = 1
 
   // FIXME: should be able to write a pattern for ARMBrcond, but can't use
-  // a two-value operand where a dag node expects two operands. :( 
+  // a two-value operand where a dag node expects two operands. :(
   def Bcc : ABI<0b1010, (outs), (ins brtarget:$target),
                IIC_Br, "b", "\t$target",
                [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]>;
 }
 
+// Branch and Exchange Jazelle -- for disassembly only
+def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
+              [/* For disassembly only; pattern left blank */]> {
+  let Inst{23-20} = 0b0010;
+  //let Inst{19-8} = 0xfff;
+  let Inst{7-4} = 0b0010;
+}
+
+// Secure Monitor Call is a system instruction -- for disassembly only
+def SMC : ABI<0b0001, (outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt",
+              [/* For disassembly only; pattern left blank */]> {
+  let Inst{23-20} = 0b0110;
+  let Inst{7-4} = 0b0111;
+}
+
+// Supervisor Call (Software Interrupt) -- for disassembly only
+let isCall = 1 in {
+def SVC : ABI<0b1111, (outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc",
+              [/* For disassembly only; pattern left blank */]>;
+}
+
+// Store Return State is a system instruction -- for disassembly only
+def SRSW : ABXI<{1,0,0,?}, (outs), (ins addrmode4:$addr, i32imm:$mode),
+                NoItinerary, "srs${addr:submode}\tsp!, $mode",
+                [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-28} = 0b1111;
+  let Inst{22-20} = 0b110; // W = 1
+}
+
+def SRS  : ABXI<{1,0,0,?}, (outs), (ins addrmode4:$addr, i32imm:$mode),
+                NoItinerary, "srs${addr:submode}\tsp, $mode",
+                [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-28} = 0b1111;
+  let Inst{22-20} = 0b100; // W = 0
+}
+
+// Return From Exception is a system instruction -- for disassembly only
+def RFEW : ABXI<{1,0,0,?}, (outs), (ins addrmode4:$addr, GPR:$base),
+                NoItinerary, "rfe${addr:submode}\t$base!",
+                [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-28} = 0b1111;
+  let Inst{22-20} = 0b011; // W = 1
+}
+
+def RFE  : ABXI<{1,0,0,?}, (outs), (ins addrmode4:$addr, GPR:$base),
+                NoItinerary, "rfe${addr:submode}\t$base",
+                [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-28} = 0b1111;
+  let Inst{22-20} = 0b001; // W = 0
+}
+
 //===----------------------------------------------------------------------===//
 //  Load / store Instructions.
 //
 
 // Load
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in 
+let canFoldAsLoad = 1, isReMaterializable = 1 in
 def LDR  : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr,
                "ldr", "\t$dst, $addr",
                [(set GPR:$dst, (load addrmode2:$addr))]>;
 
 // Special LDR for loads from non-pc-relative constpools.
-let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
-    mayHaveSideEffects = 1  in
+let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in
 def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr,
                  "ldr", "\t$dst, $addr", []>;
 
@@ -848,7 +1128,7 @@ def LDRH  : AI3ldh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
                   IIC_iLoadr, "ldrh", "\t$dst, $addr",
                   [(set GPR:$dst, (zextloadi16 addrmode3:$addr))]>;
 
-def LDRB  : AI2ldb<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, 
+def LDRB  : AI2ldb<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm,
                   IIC_iLoadr, "ldrb", "\t$dst, $addr",
                   [(set GPR:$dst, (zextloadi8 addrmode2:$addr))]>;
 
@@ -907,6 +1187,51 @@ def LDRSB_PRE : AI3ldsbpr<(outs GPR:$dst, GPR:$base_wb),
 def LDRSB_POST: AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb),
                       (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
                    "ldrsb", "\t$dst, [$base], $offset", "$base = $base_wb", []>;
+
+// For disassembly only
+def LDRD_PRE : AI3lddpr<(outs GPR:$dst1, GPR:$dst2, GPR:$base_wb),
+                        (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadr,
+                 "ldrd", "\t$dst1, $dst2, $addr!", "$addr.base = $base_wb", []>,
+                Requires<[IsARM, HasV5TE]>;
+
+// For disassembly only
+def LDRD_POST : AI3lddpo<(outs GPR:$dst1, GPR:$dst2, GPR:$base_wb),
+                       (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadr,
+            "ldrd", "\t$dst1, $dst2, [$base], $offset", "$base = $base_wb", []>,
+                Requires<[IsARM, HasV5TE]>;
+
+}
+
+// LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT are for disassembly only.
+
+def LDRT : AI2ldwpo<(outs GPR:$dst, GPR:$base_wb),
+                   (ins GPR:$base, am2offset:$offset), LdFrm, IIC_iLoadru,
+                   "ldrt", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+  let Inst{21} = 1; // overwrite
+}
+
+def LDRBT : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb),
+                  (ins GPR:$base,am2offset:$offset), LdFrm, IIC_iLoadru,
+                  "ldrbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+  let Inst{21} = 1; // overwrite
+}
+
+def LDRSBT : AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb),
+                 (ins GPR:$base,am2offset:$offset), LdMiscFrm, IIC_iLoadru,
+                 "ldrsbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+  let Inst{21} = 1; // overwrite
+}
+
+def LDRHT : AI3ldhpo<(outs GPR:$dst, GPR:$base_wb),
+                  (ins GPR:$base, am3offset:$offset), LdMiscFrm, IIC_iLoadru,
+                  "ldrht", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+  let Inst{21} = 1; // overwrite
+}
+
+def LDRSHT : AI3ldshpo<(outs GPR:$dst, GPR:$base_wb),
+                 (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
+                 "ldrsht", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+  let Inst{21} = 1; // overwrite
 }
 
 // Store
@@ -915,8 +1240,8 @@ def STR  : AI2stw<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer,
                [(store GPR:$src, addrmode2:$addr)]>;
 
 // Stores with truncate
-def STRH : AI3sth<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm, IIC_iStorer,
-               "strh", "\t$src, $addr",
+def STRH : AI3sth<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm,
+               IIC_iStorer, "strh", "\t$src, $addr",
                [(truncstorei16 GPR:$src, addrmode3:$addr)]>;
 
 def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer,
@@ -931,47 +1256,87 @@ def STRD : AI3std<(outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr),
 
 // Indexed stores
 def STR_PRE  : AI2stwpr<(outs GPR:$base_wb),
-                     (ins GPR:$src, GPR:$base, am2offset:$offset), 
+                     (ins GPR:$src, GPR:$base, am2offset:$offset),
                      StFrm, IIC_iStoreru,
                     "str", "\t$src, [$base, $offset]!", "$base = $base_wb",
                     [(set GPR:$base_wb,
                       (pre_store GPR:$src, GPR:$base, am2offset:$offset))]>;
 
 def STR_POST : AI2stwpo<(outs GPR:$base_wb),
-                     (ins GPR:$src, GPR:$base,am2offset:$offset), 
+                     (ins GPR:$src, GPR:$base,am2offset:$offset),
                      StFrm, IIC_iStoreru,
                     "str", "\t$src, [$base], $offset", "$base = $base_wb",
                     [(set GPR:$base_wb,
                       (post_store GPR:$src, GPR:$base, am2offset:$offset))]>;
 
 def STRH_PRE : AI3sthpr<(outs GPR:$base_wb),
-                     (ins GPR:$src, GPR:$base,am3offset:$offset), 
+                     (ins GPR:$src, GPR:$base,am3offset:$offset),
                      StMiscFrm, IIC_iStoreru,
                      "strh", "\t$src, [$base, $offset]!", "$base = $base_wb",
                     [(set GPR:$base_wb,
                       (pre_truncsti16 GPR:$src, GPR:$base,am3offset:$offset))]>;
 
 def STRH_POST: AI3sthpo<(outs GPR:$base_wb),
-                     (ins GPR:$src, GPR:$base,am3offset:$offset), 
+                     (ins GPR:$src, GPR:$base,am3offset:$offset),
                      StMiscFrm, IIC_iStoreru,
                      "strh", "\t$src, [$base], $offset", "$base = $base_wb",
                     [(set GPR:$base_wb, (post_truncsti16 GPR:$src,
                                          GPR:$base, am3offset:$offset))]>;
 
 def STRB_PRE : AI2stbpr<(outs GPR:$base_wb),
-                     (ins GPR:$src, GPR:$base,am2offset:$offset), 
+                     (ins GPR:$src, GPR:$base,am2offset:$offset),
                      StFrm, IIC_iStoreru,
                      "strb", "\t$src, [$base, $offset]!", "$base = $base_wb",
                     [(set GPR:$base_wb, (pre_truncsti8 GPR:$src,
                                          GPR:$base, am2offset:$offset))]>;
 
 def STRB_POST: AI2stbpo<(outs GPR:$base_wb),
-                     (ins GPR:$src, GPR:$base,am2offset:$offset), 
+                     (ins GPR:$src, GPR:$base,am2offset:$offset),
                      StFrm, IIC_iStoreru,
                      "strb", "\t$src, [$base], $offset", "$base = $base_wb",
                     [(set GPR:$base_wb, (post_truncsti8 GPR:$src,
                                          GPR:$base, am2offset:$offset))]>;
 
+// For disassembly only
+def STRD_PRE : AI3stdpr<(outs GPR:$base_wb),
+                     (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset),
+                     StMiscFrm, IIC_iStoreru,
+                     "strd", "\t$src1, $src2, [$base, $offset]!",
+                     "$base = $base_wb", []>;
+
+// For disassembly only
+def STRD_POST: AI3stdpo<(outs GPR:$base_wb),
+                     (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset),
+                     StMiscFrm, IIC_iStoreru,
+                     "strd", "\t$src1, $src2, [$base], $offset",
+                     "$base = $base_wb", []>;
+
+// STRT, STRBT, and STRHT are for disassembly only.
+
+def STRT : AI2stwpo<(outs GPR:$base_wb),
+                    (ins GPR:$src, GPR:$base,am2offset:$offset),
+                    StFrm, IIC_iStoreru,
+                    "strt", "\t$src, [$base], $offset", "$base = $base_wb",
+                    [/* For disassembly only; pattern left blank */]> {
+  let Inst{21} = 1; // overwrite
+}
+
+def STRBT : AI2stbpo<(outs GPR:$base_wb),
+                     (ins GPR:$src, GPR:$base,am2offset:$offset),
+                     StFrm, IIC_iStoreru,
+                     "strbt", "\t$src, [$base], $offset", "$base = $base_wb",
+                     [/* For disassembly only; pattern left blank */]> {
+  let Inst{21} = 1; // overwrite
+}
+
+def STRHT: AI3sthpo<(outs GPR:$base_wb),
+                    (ins GPR:$src, GPR:$base,am3offset:$offset),
+                    StMiscFrm, IIC_iStoreru,
+                    "strht", "\t$src, [$base], $offset", "$base = $base_wb",
+                    [/* For disassembly only; pattern left blank */]> {
+  let Inst{21} = 1; // overwrite
+}
+
 //===----------------------------------------------------------------------===//
 //  Load / store multiple Instructions.
 //
@@ -999,7 +1364,7 @@ def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr,
   let Inst{25} = 0;
 }
 
-def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src), 
+def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src),
                 DPSoRegFrm, IIC_iMOVsr,
                 "mov", "\t$dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP {
   let Inst{25} = 0;
@@ -1012,7 +1377,7 @@ def MOVi : AsI1<0b1101, (outs GPR:$dst), (ins so_imm:$src), DPFrm, IIC_iMOVi,
 }
 
 let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def MOVi16 : AI1<0b1000, (outs GPR:$dst), (ins i32imm:$src), 
+def MOVi16 : AI1<0b1000, (outs GPR:$dst), (ins i32imm:$src),
                  DPFrm, IIC_iMOVi,
                  "movw", "\t$dst, $src",
                  [(set GPR:$dst, imm0_65535:$src)]>,
@@ -1026,7 +1391,7 @@ def MOVTi16 : AI1<0b1010, (outs GPR:$dst), (ins GPR:$src, i32imm:$imm),
                   DPFrm, IIC_iMOVi,
                   "movt", "\t$dst, $imm",
                   [(set GPR:$dst,
-                        (or (and GPR:$src, 0xffff), 
+                        (or (and GPR:$src, 0xffff),
                             lo16AllZero:$imm))]>, UnaryDP,
                   Requires<[IsARM, HasV6T2]> {
   let Inst{20} = 0;
@@ -1045,7 +1410,7 @@ def MOVrx : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, IIC_iMOVsi,
 // due to flag operands.
 
 let Defs = [CPSR] in {
-def MOVsrl_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, 
+def MOVsrl_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo,
                       IIC_iMOVsi, "movs", "\t$dst, $src, lsr #1",
                       [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP;
 def MOVsra_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo,
@@ -1069,7 +1434,11 @@ defm SXTAB : AI_bin_rrot<0b01101010,
 defm SXTAH : AI_bin_rrot<0b01101011,
                "sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
 
-// TODO: SXT(A){B|H}16
+// For disassembly only
+defm SXTB16  : AI_unary_rrot_np<0b01101000, "sxtb16">;
+
+// For disassembly only
+defm SXTAB16 : AI_bin_rrot_np<0b01101000, "sxtab16">;
 
 // Zero extenders
 
@@ -1093,9 +1462,9 @@ defm UXTAH : AI_bin_rrot<0b01101111, "uxtah",
 }
 
 // This isn't safe in general, the add is two 16-bit units, not a 32-bit add.
-//defm UXTAB16 : xxx<"uxtab16", 0xff00ff>;
+// For disassembly only
+defm UXTAB16 : AI_bin_rrot_np<0b01101100, "uxtab16">;
 
-// TODO: UXT(A){B|H}16
 
 def SBFX  : I<(outs GPR:$dst),
               (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
@@ -1131,13 +1500,13 @@ defm SUBS : AI1_bin_s_irs<0b0010, "subs",
                           BinOpFrag<(subc node:$LHS, node:$RHS)>>;
 
 defm ADC : AI1_adde_sube_irs<0b0101, "adc",
-                             BinOpFrag<(adde node:$LHS, node:$RHS)>, 1>;
+                          BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>;
 defm SBC : AI1_adde_sube_irs<0b0110, "sbc",
-                             BinOpFrag<(sube node:$LHS, node:$RHS)>>;
+                          BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>;
 defm ADCS : AI1_adde_sube_s_irs<0b0101, "adcs",
-                             BinOpFrag<(adde node:$LHS, node:$RHS)>, 1>;
+                          BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>;
 defm SBCS : AI1_adde_sube_s_irs<0b0110, "sbcs",
-                             BinOpFrag<(sube node:$LHS, node:$RHS)>>;
+                          BinOpFrag<(sube_live_carry node:$LHS, node:$RHS) >>;
 
 // These don't define reg/reg forms, because they are handled above.
 def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
@@ -1171,14 +1540,14 @@ def RSBSrs : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
 let Uses = [CPSR] in {
 def RSCri : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
                  DPFrm, IIC_iALUi, "rsc", "\t$dst, $a, $b",
-                 [(set GPR:$dst, (sube so_imm:$b, GPR:$a))]>,
-                 Requires<[IsARM, CarryDefIsUnused]> {
+                 [(set GPR:$dst, (sube_dead_carry so_imm:$b, GPR:$a))]>,
+                 Requires<[IsARM]> {
     let Inst{25} = 1;
 }
 def RSCrs : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
                  DPSoRegFrm, IIC_iALUsr, "rsc", "\t$dst, $a, $b",
-                 [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>,
-                 Requires<[IsARM, CarryDefIsUnused]> {
+                 [(set GPR:$dst, (sube_dead_carry so_reg:$b, GPR:$a))]>,
+                 Requires<[IsARM]> {
     let Inst{25} = 0;
 }
 }
@@ -1187,15 +1556,15 @@ def RSCrs : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
 let Defs = [CPSR], Uses = [CPSR] in {
 def RSCSri : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
                   DPFrm, IIC_iALUi, "rscs\t$dst, $a, $b",
-                  [(set GPR:$dst, (sube so_imm:$b, GPR:$a))]>,
-                  Requires<[IsARM, CarryDefIsUnused]> {
+                  [(set GPR:$dst, (sube_dead_carry so_imm:$b, GPR:$a))]>,
+                  Requires<[IsARM]> {
     let Inst{20} = 1;
     let Inst{25} = 1;
 }
 def RSCSrs : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
                   DPSoRegFrm, IIC_iALUsr, "rscs\t$dst, $a, $b",
-                  [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>,
-                  Requires<[IsARM, CarryDefIsUnused]> {
+                  [(set GPR:$dst, (sube_dead_carry so_reg:$b, GPR:$a))]>,
+                  Requires<[IsARM]> {
     let Inst{20} = 1;
     let Inst{25} = 0;
 }
@@ -1216,6 +1585,126 @@ def : ARMPat<(add    GPR:$src, so_imm_neg:$imm),
 // (mul X, 2^n+1) -> (add (X << n), X)
 // (mul X, 2^n-1) -> (rsb X, (X << n))
 
+// ARM Arithmetic Instruction -- for disassembly only
+// GPR:$dst = GPR:$a op GPR:$b
+class AAI<bits<8> op27_20, bits<4> op7_4, string opc>
+  : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, IIC_iALUr,
+       opc, "\t$dst, $a, $b",
+       [/* For disassembly only; pattern left blank */]> {
+  let Inst{27-20} = op27_20;
+  let Inst{7-4} = op7_4;
+}
+
+// Saturating add/subtract -- for disassembly only
+
+def QADD    : AAI<0b00010000, 0b0101, "qadd">;
+def QADD16  : AAI<0b01100010, 0b0001, "qadd16">;
+def QADD8   : AAI<0b01100010, 0b1001, "qadd8">;
+def QASX    : AAI<0b01100010, 0b0011, "qasx">;
+def QDADD   : AAI<0b00010100, 0b0101, "qdadd">;
+def QDSUB   : AAI<0b00010110, 0b0101, "qdsub">;
+def QSAX    : AAI<0b01100010, 0b0101, "qsax">;
+def QSUB    : AAI<0b00010010, 0b0101, "qsub">;
+def QSUB16  : AAI<0b01100010, 0b0111, "qsub16">;
+def QSUB8   : AAI<0b01100010, 0b1111, "qsub8">;
+def UQADD16 : AAI<0b01100110, 0b0001, "uqadd16">;
+def UQADD8  : AAI<0b01100110, 0b1001, "uqadd8">;
+def UQASX   : AAI<0b01100110, 0b0011, "uqasx">;
+def UQSAX   : AAI<0b01100110, 0b0101, "uqsax">;
+def UQSUB16 : AAI<0b01100110, 0b0111, "uqsub16">;
+def UQSUB8  : AAI<0b01100110, 0b1111, "uqsub8">;
+
+// Signed/Unsigned add/subtract -- for disassembly only
+
+def SASX   : AAI<0b01100001, 0b0011, "sasx">;
+def SADD16 : AAI<0b01100001, 0b0001, "sadd16">;
+def SADD8  : AAI<0b01100001, 0b1001, "sadd8">;
+def SSAX   : AAI<0b01100001, 0b0101, "ssax">;
+def SSUB16 : AAI<0b01100001, 0b0111, "ssub16">;
+def SSUB8  : AAI<0b01100001, 0b1111, "ssub8">;
+def UASX   : AAI<0b01100101, 0b0011, "uasx">;
+def UADD16 : AAI<0b01100101, 0b0001, "uadd16">;
+def UADD8  : AAI<0b01100101, 0b1001, "uadd8">;
+def USAX   : AAI<0b01100101, 0b0101, "usax">;
+def USUB16 : AAI<0b01100101, 0b0111, "usub16">;
+def USUB8  : AAI<0b01100101, 0b1111, "usub8">;
+
+// Signed/Unsigned halving add/subtract -- for disassembly only
+
+def SHASX   : AAI<0b01100011, 0b0011, "shasx">;
+def SHADD16 : AAI<0b01100011, 0b0001, "shadd16">;
+def SHADD8  : AAI<0b01100011, 0b1001, "shadd8">;
+def SHSAX   : AAI<0b01100011, 0b0101, "shsax">;
+def SHSUB16 : AAI<0b01100011, 0b0111, "shsub16">;
+def SHSUB8  : AAI<0b01100011, 0b1111, "shsub8">;
+def UHASX   : AAI<0b01100111, 0b0011, "uhasx">;
+def UHADD16 : AAI<0b01100111, 0b0001, "uhadd16">;
+def UHADD8  : AAI<0b01100111, 0b1001, "uhadd8">;
+def UHSAX   : AAI<0b01100111, 0b0101, "uhsax">;
+def UHSUB16 : AAI<0b01100111, 0b0111, "uhsub16">;
+def UHSUB8  : AAI<0b01100111, 0b1111, "uhsub8">;
+
+// Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only
+
+def USAD8  : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b),
+                MulFrm /* for convenience */, NoItinerary, "usad8",
+                "\t$dst, $a, $b", []>,
+             Requires<[IsARM, HasV6]> {
+  let Inst{27-20} = 0b01111000;
+  let Inst{15-12} = 0b1111;
+  let Inst{7-4} = 0b0001;
+}
+def USADA8 : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
+                MulFrm /* for convenience */, NoItinerary, "usada8",
+                "\t$dst, $a, $b, $acc", []>,
+             Requires<[IsARM, HasV6]> {
+  let Inst{27-20} = 0b01111000;
+  let Inst{7-4} = 0b0001;
+}
+
+// Signed/Unsigned saturate -- for disassembly only
+
+def SSATlsl : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt),
+                 DPFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a, lsl $shamt",
+                 [/* For disassembly only; pattern left blank */]> {
+  let Inst{27-21} = 0b0110101;
+  let Inst{6-4} = 0b001;
+}
+
+def SSATasr : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt),
+                 DPFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a, asr $shamt",
+                 [/* For disassembly only; pattern left blank */]> {
+  let Inst{27-21} = 0b0110101;
+  let Inst{6-4} = 0b101;
+}
+
+def SSAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), DPFrm,
+                NoItinerary, "ssat16", "\t$dst, $bit_pos, $a",
+                [/* For disassembly only; pattern left blank */]> {
+  let Inst{27-20} = 0b01101010;
+  let Inst{7-4} = 0b0011;
+}
+
+def USATlsl : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt),
+                 DPFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a, lsl $shamt",
+                 [/* For disassembly only; pattern left blank */]> {
+  let Inst{27-21} = 0b0110111;
+  let Inst{6-4} = 0b001;
+}
+
+def USATasr : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt),
+                 DPFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a, asr $shamt",
+                 [/* For disassembly only; pattern left blank */]> {
+  let Inst{27-21} = 0b0110111;
+  let Inst{6-4} = 0b101;
+}
+
+def USAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), DPFrm,
+                NoItinerary, "usat16", "\t$dst, $bit_pos, $a",
+                [/* For disassembly only; pattern left blank */]> {
+  let Inst{27-20} = 0b01101110;
+  let Inst{7-4} = 0b0011;
+}
 
 //===----------------------------------------------------------------------===//
 //  Bitwise Instructions.
@@ -1239,6 +1728,17 @@ def BFC    : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
   let Inst{6-0}   = 0b0011111;
 }
 
+// A8.6.18  BFI - Bitfield insert (Encoding A1)
+// Added for disassembler with the pattern field purposely left blank.
+def BFI    : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
+               AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
+               "bfi", "\t$dst, $src, $imm", "",
+               [/* For disassembly only; pattern left blank */]>,
+               Requires<[IsARM, HasV6T2]> {
+  let Inst{27-21} = 0b0111110;
+  let Inst{6-4}   = 0b001; // Rn: Inst{3-0} != 15
+}
+
 def  MVNr  : AsI1<0b1111, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr,
                   "mvn", "\t$dst, $src",
                   [(set GPR:$dst, (not GPR:$src))]>, UnaryDP {
@@ -1251,7 +1751,7 @@ def  MVNs  : AsI1<0b1111, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm,
   let Inst{25} = 0;
 }
 let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def  MVNi  : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm, 
+def  MVNi  : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm,
                   IIC_iMOVi, "mvn", "\t$dst, $imm",
                   [(set GPR:$dst, so_imm_not:$imm)]>,UnaryDP {
     let Inst{25} = 1;
@@ -1314,6 +1814,14 @@ def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
   let Inst{15-12} = 0b1111;
 }
 
+def SMMULR : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+               IIC_iMUL32, "smmulr", "\t$dst, $a, $b",
+               [/* For disassembly only; pattern left blank */]>,
+            Requires<[IsARM, HasV6]> {
+  let Inst{7-4}   = 0b0011; // R = 1
+  let Inst{15-12} = 0b1111;
+}
+
 def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
                IIC_iMAC32, "smmla", "\t$dst, $a, $b, $c",
                [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>,
@@ -1321,6 +1829,12 @@ def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
   let Inst{7-4}   = 0b0001;
 }
 
+def SMMLAR : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
+               IIC_iMAC32, "smmlar", "\t$dst, $a, $b, $c",
+               [/* For disassembly only; pattern left blank */]>,
+            Requires<[IsARM, HasV6]> {
+  let Inst{7-4}   = 0b0011; // R = 1
+}
 
 def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
                IIC_iMAC32, "smmls", "\t$dst, $a, $b, $c",
@@ -1329,6 +1843,13 @@ def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
   let Inst{7-4}   = 0b1101;
 }
 
+def SMMLSR : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
+               IIC_iMAC32, "smmlsr", "\t$dst, $a, $b, $c",
+               [/* For disassembly only; pattern left blank */]>,
+            Requires<[IsARM, HasV6]> {
+  let Inst{7-4}   = 0b1111; // R = 1
+}
+
 multiclass AI_smul<string opc, PatFrag opnode> {
   def BB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
               IIC_iMUL32, !strconcat(opc, "bb"), "\t$dst, $a, $b",
@@ -1400,7 +1921,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
   def BT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
               IIC_iMAC16, !strconcat(opc, "bt"), "\t$dst, $a, $b, $acc",
               [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16),
-                                                     (sra GPR:$b, (i32 16)))))]>,
+                                                    (sra GPR:$b, (i32 16)))))]>,
            Requires<[IsARM, HasV5TE]> {
              let Inst{5} = 0;
              let Inst{6} = 1;
@@ -1446,8 +1967,87 @@ multiclass AI_smla<string opc, PatFrag opnode> {
 defm SMUL : AI_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
 defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
 
-// TODO: Halfword multiple accumulate long: SMLAL<x><y>
-// TODO: Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
+// Halfword multiply accumulate long: SMLAL<x><y> -- for disassembly only
+def SMLALBB : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b),
+                      IIC_iMAC64, "smlalbb", "\t$ldst, $hdst, $a, $b",
+                      [/* For disassembly only; pattern left blank */]>,
+              Requires<[IsARM, HasV5TE]> {
+  let Inst{5} = 0;
+  let Inst{6} = 0;
+}
+
+def SMLALBT : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b),
+                      IIC_iMAC64, "smlalbt", "\t$ldst, $hdst, $a, $b",
+                      [/* For disassembly only; pattern left blank */]>,
+              Requires<[IsARM, HasV5TE]> {
+  let Inst{5} = 0;
+  let Inst{6} = 1;
+}
+
+def SMLALTB : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b),
+                      IIC_iMAC64, "smlaltb", "\t$ldst, $hdst, $a, $b",
+                      [/* For disassembly only; pattern left blank */]>,
+              Requires<[IsARM, HasV5TE]> {
+  let Inst{5} = 1;
+  let Inst{6} = 0;
+}
+
+def SMLALTT : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b),
+                      IIC_iMAC64, "smlaltt", "\t$ldst, $hdst, $a, $b",
+                      [/* For disassembly only; pattern left blank */]>,
+              Requires<[IsARM, HasV5TE]> {
+  let Inst{5} = 1;
+  let Inst{6} = 1;
+}
+
+// Helper class for AI_smld -- for disassembly only
+class AMulDualI<bit long, bit sub, bit swap, dag oops, dag iops,
+                InstrItinClass itin, string opc, string asm>
+  : AI<oops, iops, MulFrm, itin, opc, asm, []>, Requires<[IsARM, HasV6]> {
+  let Inst{4}     = 1;
+  let Inst{5}     = swap;
+  let Inst{6}     = sub;
+  let Inst{7}     = 0;
+  let Inst{21-20} = 0b00;
+  let Inst{22}    = long;
+  let Inst{27-23} = 0b01110;
+}
+
+multiclass AI_smld<bit sub, string opc> {
+
+  def D : AMulDualI<0, sub, 0, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
+                  NoItinerary, !strconcat(opc, "d"), "\t$dst, $a, $b, $acc">;
+
+  def DX : AMulDualI<0, sub, 1, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
+                  NoItinerary, !strconcat(opc, "dx"), "\t$dst, $a, $b, $acc">;
+
+  def LD : AMulDualI<1, sub, 0, (outs GPR:$ldst,GPR:$hdst), (ins GPR:$a,GPR:$b),
+                  NoItinerary, !strconcat(opc, "ld"), "\t$ldst, $hdst, $a, $b">;
+
+  def LDX : AMulDualI<1, sub, 1, (outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b),
+                  NoItinerary, !strconcat(opc, "ldx"),"\t$ldst, $hdst, $a, $b">;
+
+}
+
+defm SMLA : AI_smld<0, "smla">;
+defm SMLS : AI_smld<1, "smls">;
+
+multiclass AI_sdml<bit sub, string opc> {
+
+  def D : AMulDualI<0, sub, 0, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+                    NoItinerary, !strconcat(opc, "d"), "\t$dst, $a, $b"> {
+    let Inst{15-12} = 0b1111;
+  }
+
+  def DX : AMulDualI<0, sub, 1, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+                    NoItinerary, !strconcat(opc, "dx"), "\t$dst, $a, $b"> {
+    let Inst{15-12} = 0b1111;
+  }
+
+}
+
+defm SMUA : AI_sdml<0, "smua">;
+defm SMUS : AI_sdml<1, "smus">;
 
 //===----------------------------------------------------------------------===//
 //  Misc. Arithmetic Instructions.
@@ -1505,7 +2105,7 @@ def REVSH : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
 
 def PKHBT : AMiscA1I<0b01101000, (outs GPR:$dst),
                                  (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
-               IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, LSL $shamt",
+               IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, lsl $shamt",
                [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF),
                                    (and (shl GPR:$src2, (i32 imm:$shamt)),
                                         0xFFFF0000)))]>,
@@ -1522,7 +2122,7 @@ def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)),
 
 def PKHTB : AMiscA1I<0b01101000, (outs GPR:$dst),
                                  (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
-               IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, ASR $shamt",
+               IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, asr $shamt",
                [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000),
                                    (and (sra GPR:$src2, imm16_31:$shamt),
                                         0xFFFF)))]>, Requires<[IsARM, HasV6]> {
@@ -1568,7 +2168,7 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
 
 // Conditional moves
 // FIXME: should be able to write a pattern for ARMcmov, but can't use
-// a two-value operand where a dag node expects two operands. :( 
+// a two-value operand where a dag node expects two operands. :(
 def MOVCCr : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, GPR:$true), DPFrm,
                 IIC_iCMOVr, "mov", "\t$dst, $true",
       [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>,
@@ -1606,6 +2206,7 @@ def Int_MemBarrierV7 : AInoP<(outs), (ins),
                         Requires<[IsARM, HasV7]> {
   let Inst{31-4} = 0xf57ff05;
   // FIXME: add support for options other than a full system DMB
+  // See DMB disassembly-only variants below.
   let Inst{3-0} = 0b1111;
 }
 
@@ -1616,6 +2217,7 @@ def Int_SyncBarrierV7 : AInoP<(outs), (ins),
                         Requires<[IsARM, HasV7]> {
   let Inst{31-4} = 0xf57ff04;
   // FIXME: add support for options other than a full system DSB
+  // See DSB disassembly-only variants below.
   let Inst{3-0} = 0b1111;
 }
 
@@ -1638,6 +2240,64 @@ def Int_SyncBarrierV6 : AInoP<(outs), (ins GPR:$zero),
 }
 }
 
+// Helper class for multiclass MemB -- for disassembly only
+class AMBI<string opc, string asm>
+  : AInoP<(outs), (ins), MiscFrm, NoItinerary, opc, asm,
+          [/* For disassembly only; pattern left blank */]>,
+    Requires<[IsARM, HasV7]> {
+  let Inst{31-20} = 0xf57;
+}
+
+multiclass MemB<bits<4> op7_4, string opc> {
+
+  def st : AMBI<opc, "\tst"> {
+    let Inst{7-4} = op7_4;
+    let Inst{3-0} = 0b1110;
+  }
+
+  def ish : AMBI<opc, "\tish"> {
+    let Inst{7-4} = op7_4;
+    let Inst{3-0} = 0b1011;
+  }
+
+  def ishst : AMBI<opc, "\tishst"> {
+    let Inst{7-4} = op7_4;
+    let Inst{3-0} = 0b1010;
+  }
+
+  def nsh : AMBI<opc, "\tnsh"> {
+    let Inst{7-4} = op7_4;
+    let Inst{3-0} = 0b0111;
+  }
+
+  def nshst : AMBI<opc, "\tnshst"> {
+    let Inst{7-4} = op7_4;
+    let Inst{3-0} = 0b0110;
+  }
+
+  def osh : AMBI<opc, "\tosh"> {
+    let Inst{7-4} = op7_4;
+    let Inst{3-0} = 0b0011;
+  }
+
+  def oshst : AMBI<opc, "\toshst"> {
+    let Inst{7-4} = op7_4;
+    let Inst{3-0} = 0b0010;
+  }
+}
+
+// These DMB variants are for disassembly only.
+defm DMB : MemB<0b0101, "dmb">;
+
+// These DSB variants are for disassembly only.
+defm DSB : MemB<0b0100, "dsb">;
+
+// ISB has only full system option -- for disassembly only
+def ISBsy : AMBI<"isb", ""> {
+  let Inst{7-4} = 0b0110;
+  let Inst{3-0} = 0b1111;
+}
+
 let usesCustomInserter = 1 in {
   let Uses = [CPSR] in {
     def ATOMIC_LOAD_ADD_I8 : PseudoInst<
@@ -1777,6 +2437,35 @@ def STREXD : AIstrex<0b01, (outs GPR:$success),
                     []>;
 }
 
+// Clear-Exclusive is for disassembly only.
+def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex",
+                [/* For disassembly only; pattern left blank */]>,
+            Requires<[IsARM, HasV7]>  {
+  let Inst{31-20} = 0xf57;
+  let Inst{7-4} = 0b0001;
+}
+
+// SWP/SWPB are deprecated in V6/V7 and for disassembly only.
+let mayLoad = 1 in {
+def SWP : AI<(outs GPR:$dst), (ins GPR:$src, GPR:$ptr), LdStExFrm, NoItinerary,
+             "swp", "\t$dst, $src, [$ptr]",
+             [/* For disassembly only; pattern left blank */]> {
+  let Inst{27-23} = 0b00010;
+  let Inst{22} = 0; // B = 0
+  let Inst{21-20} = 0b00;
+  let Inst{7-4} = 0b1001;
+}
+
+def SWPB : AI<(outs GPR:$dst), (ins GPR:$src, GPR:$ptr), LdStExFrm, NoItinerary,
+             "swpb", "\t$dst, $src, [$ptr]",
+             [/* For disassembly only; pattern left blank */]> {
+  let Inst{27-23} = 0b00010;
+  let Inst{22} = 1; // B = 1
+  let Inst{21-20} = 0b00;
+  let Inst{7-4} = 0b1001;
+}
+}
+
 //===----------------------------------------------------------------------===//
 // TLS Instructions
 //
@@ -1827,7 +2516,7 @@ let Defs =
 
 // Two piece so_imms.
 let isReMaterializable = 1 in
-def MOVi2pieces : AI1x2<(outs GPR:$dst), (ins so_imm2part:$src), 
+def MOVi2pieces : AI1x2<(outs GPR:$dst), (ins so_imm2part:$src),
                          Pseudo, IIC_iMOVi,
                          "mov", "\t$dst, $src",
                          [(set GPR:$dst, so_imm2part:$src)]>,
@@ -1852,7 +2541,7 @@ def : ARMPat<(add GPR:$LHS, so_neg_imm2part:$RHS),
 // FIXME: Remove this when we can do generalized remat.
 let isReMaterializable = 1 in
 def MOVi32imm : AI1x2<(outs GPR:$dst), (ins i32imm:$src), Pseudo, IIC_iMOVi,
-                    "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}",
+                   "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}",
                      [(set GPR:$dst, (i32 imm:$src))]>,
                Requires<[IsARM, HasV6T2]>;
 
@@ -1959,3 +2648,226 @@ include "ARMInstrVFP.td"
 //
 
 include "ARMInstrNEON.td"
+
+//===----------------------------------------------------------------------===//
+// Coprocessor Instructions.  For disassembly only.
+//
+
+def CDP : ABI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1,
+            nohash_imm:$CRd, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2),
+            NoItinerary, "cdp", "\tp$cop, $opc1, cr$CRd, cr$CRn, cr$CRm, $opc2",
+              [/* For disassembly only; pattern left blank */]> {
+  let Inst{4} = 0;
+}
+
+def CDP2 : ABXI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1,
+               nohash_imm:$CRd, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2),
+               NoItinerary, "cdp2\tp$cop, $opc1, cr$CRd, cr$CRn, cr$CRm, $opc2",
+               [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-28} = 0b1111;
+  let Inst{4} = 0;
+}
+
+class ACI<dag oops, dag iops, string opc, string asm>
+  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, BrFrm, NoItinerary,
+      opc, asm, "", [/* For disassembly only; pattern left blank */]> {
+  let Inst{27-25} = 0b110;
+}
+
+multiclass LdStCop<bits<4> op31_28, bit load, string opc> {
+
+  def _OFFSET : ACI<(outs),
+      (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr),
+      opc, "\tp$cop, cr$CRd, $addr"> {
+    let Inst{31-28} = op31_28;
+    let Inst{24} = 1; // P = 1
+    let Inst{21} = 0; // W = 0
+    let Inst{22} = 0; // D = 0
+    let Inst{20} = load;
+  }
+
+  def _PRE : ACI<(outs),
+      (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr),
+      opc, "\tp$cop, cr$CRd, $addr!"> {
+    let Inst{31-28} = op31_28;
+    let Inst{24} = 1; // P = 1
+    let Inst{21} = 1; // W = 1
+    let Inst{22} = 0; // D = 0
+    let Inst{20} = load;
+  }
+
+  def _POST : ACI<(outs),
+      (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, am2offset:$offset),
+      opc, "\tp$cop, cr$CRd, [$base], $offset"> {
+    let Inst{31-28} = op31_28;
+    let Inst{24} = 0; // P = 0
+    let Inst{21} = 1; // W = 1
+    let Inst{22} = 0; // D = 0
+    let Inst{20} = load;
+  }
+
+  def _OPTION : ACI<(outs),
+      (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, i32imm:$option),
+      opc, "\tp$cop, cr$CRd, [$base], $option"> {
+    let Inst{31-28} = op31_28;
+    let Inst{24} = 0; // P = 0
+    let Inst{23} = 1; // U = 1
+    let Inst{21} = 0; // W = 0
+    let Inst{22} = 0; // D = 0
+    let Inst{20} = load;
+  }
+
+  def L_OFFSET : ACI<(outs),
+      (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr),
+      opc, "l\tp$cop, cr$CRd, $addr"> {
+    let Inst{31-28} = op31_28;
+    let Inst{24} = 1; // P = 1
+    let Inst{21} = 0; // W = 0
+    let Inst{22} = 1; // D = 1
+    let Inst{20} = load;
+  }
+
+  def L_PRE : ACI<(outs),
+      (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr),
+      opc, "l\tp$cop, cr$CRd, $addr!"> {
+    let Inst{31-28} = op31_28;
+    let Inst{24} = 1; // P = 1
+    let Inst{21} = 1; // W = 1
+    let Inst{22} = 1; // D = 1
+    let Inst{20} = load;
+  }
+
+  def L_POST : ACI<(outs),
+      (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, am2offset:$offset),
+      opc, "l\tp$cop, cr$CRd, [$base], $offset"> {
+    let Inst{31-28} = op31_28;
+    let Inst{24} = 0; // P = 0
+    let Inst{21} = 1; // W = 1
+    let Inst{22} = 1; // D = 1
+    let Inst{20} = load;
+  }
+
+  def L_OPTION : ACI<(outs),
+      (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, nohash_imm:$option),
+      opc, "l\tp$cop, cr$CRd, [$base], $option"> {
+    let Inst{31-28} = op31_28;
+    let Inst{24} = 0; // P = 0
+    let Inst{23} = 1; // U = 1
+    let Inst{21} = 0; // W = 0
+    let Inst{22} = 1; // D = 1
+    let Inst{20} = load;
+  }
+}
+
+defm LDC  : LdStCop<{?,?,?,?}, 1, "ldc">;
+defm LDC2 : LdStCop<0b1111,    1, "ldc2">;
+defm STC  : LdStCop<{?,?,?,?}, 0, "stc">;
+defm STC2 : LdStCop<0b1111,    0, "stc2">;
+
+def MCR : ABI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1,
+              GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2),
+              NoItinerary, "mcr", "\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2",
+              [/* For disassembly only; pattern left blank */]> {
+  let Inst{20} = 0;
+  let Inst{4} = 1;
+}
+
+def MCR2 : ABXI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1,
+                GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2),
+                NoItinerary, "mcr2\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2",
+                [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-28} = 0b1111;
+  let Inst{20} = 0;
+  let Inst{4} = 1;
+}
+
+def MRC : ABI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1,
+              GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2),
+              NoItinerary, "mrc", "\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2",
+              [/* For disassembly only; pattern left blank */]> {
+  let Inst{20} = 1;
+  let Inst{4} = 1;
+}
+
+def MRC2 : ABXI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1,
+                GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2),
+                NoItinerary, "mrc2\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2",
+                [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-28} = 0b1111;
+  let Inst{20} = 1;
+  let Inst{4} = 1;
+}
+
+def MCRR : ABI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc,
+               GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm),
+               NoItinerary, "mcrr", "\tp$cop, $opc, $Rt, $Rt2, cr$CRm",
+               [/* For disassembly only; pattern left blank */]> {
+  let Inst{23-20} = 0b0100;
+}
+
+def MCRR2 : ABXI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc,
+                 GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm),
+                 NoItinerary, "mcrr2\tp$cop, $opc, $Rt, $Rt2, cr$CRm",
+                 [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-28} = 0b1111;
+  let Inst{23-20} = 0b0100;
+}
+
+def MRRC : ABI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc,
+               GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm),
+               NoItinerary, "mrrc", "\tp$cop, $opc, $Rt, $Rt2, cr$CRm",
+               [/* For disassembly only; pattern left blank */]> {
+  let Inst{23-20} = 0b0101;
+}
+
+def MRRC2 : ABXI<0b1100, (outs), (ins nohash_imm:$cop, i32imm:$opc,
+                 GPR:$Rt, GPR:$Rt2, nohash_imm:$CRm),
+                 NoItinerary, "mrrc2\tp$cop, $opc, $Rt, $Rt2, cr$CRm",
+                 [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-28} = 0b1111;
+  let Inst{23-20} = 0b0101;
+}
+
+//===----------------------------------------------------------------------===//
+// Move between special register and ARM core register -- for disassembly only
+//
+
+def MRS : ABI<0b0001,(outs GPR:$dst),(ins), NoItinerary, "mrs", "\t$dst, cpsr",
+              [/* For disassembly only; pattern left blank */]> {
+  let Inst{23-20} = 0b0000;
+  let Inst{7-4} = 0b0000;
+}
+
+def MRSsys : ABI<0b0001,(outs GPR:$dst),(ins), NoItinerary,"mrs","\t$dst, spsr",
+              [/* For disassembly only; pattern left blank */]> {
+  let Inst{23-20} = 0b0100;
+  let Inst{7-4} = 0b0000;
+}
+
+// FIXME: mask is ignored for the time being.
+def MSR : ABI<0b0001,(outs),(ins GPR:$src), NoItinerary, "msr", "\tcpsr, $src",
+              [/* For disassembly only; pattern left blank */]> {
+  let Inst{23-20} = 0b0010;
+  let Inst{7-4} = 0b0000;
+}
+
+// FIXME: mask is ignored for the time being.
+def MSRi : ABI<0b0011,(outs),(ins so_imm:$a), NoItinerary, "msr", "\tcpsr, $a",
+              [/* For disassembly only; pattern left blank */]> {
+  let Inst{23-20} = 0b0010;
+  let Inst{7-4} = 0b0000;
+}
+
+// FIXME: mask is ignored for the time being.
+def MSRsys : ABI<0b0001,(outs),(ins GPR:$src),NoItinerary,"msr","\tspsr, $src",
+              [/* For disassembly only; pattern left blank */]> {
+  let Inst{23-20} = 0b0110;
+  let Inst{7-4} = 0b0000;
+}
+
+// FIXME: mask is ignored for the time being.
+def MSRsysi : ABI<0b0011,(outs),(ins so_imm:$a),NoItinerary,"msr","\tspsr, $a",
+              [/* For disassembly only; pattern left blank */]> {
+  let Inst{23-20} = 0b0110;
+  let Inst{7-4} = 0b0000;
+}
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index e2be7ba..3aa0810 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -83,11 +83,17 @@ def NEONvrev32    : SDNode<"ARMISD::VREV32", SDTARMVSHUF>;
 def NEONvrev16    : SDNode<"ARMISD::VREV16", SDTARMVSHUF>;
 
 def SDTARMVSHUF2  : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
-                                         SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>]>;
+                                         SDTCisSameAs<0, 2>,
+                                         SDTCisSameAs<0, 3>]>;
 def NEONzip       : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
 def NEONuzp       : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
 def NEONtrn       : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
 
+def SDTARMFMAX    : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>,
+                                         SDTCisSameAs<0, 2>]>;
+def NEONfmax      : SDNode<"ARMISD::FMAX", SDTARMFMAX>;
+def NEONfmin      : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
+
 //===----------------------------------------------------------------------===//
 // NEON operand definitions
 //===----------------------------------------------------------------------===//
@@ -123,9 +129,7 @@ def h64imm : Operand<i64> {
 let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
 def VLDMD : NI<(outs),
                (ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops),
-               IIC_fpLoadm,
-               "vldm", "${addr:submode} ${addr:base}, $dst1",
-               []> {
+               IIC_fpLoadm, "vldm", "${addr:submode} ${addr:base}, $dst1", []> {
   let Inst{27-25} = 0b110;
   let Inst{20}    = 1;
   let Inst{11-9}  = 0b101;
@@ -133,9 +137,7 @@ def VLDMD : NI<(outs),
 
 def VLDMS : NI<(outs),
                (ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops),
-               IIC_fpLoadm,
-               "vldm", "${addr:submode} ${addr:base}, $dst1",
-               []> {
+               IIC_fpLoadm, "vldm", "${addr:submode} ${addr:base}, $dst1", []> {
   let Inst{27-25} = 0b110;
   let Inst{20}    = 1;
   let Inst{11-9}  = 0b101;
@@ -144,10 +146,9 @@ def VLDMS : NI<(outs),
 */
 
 // Use vldmia to load a Q register as a D register pair.
-def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr),
-               IIC_fpLoadm,
-               "vldmia", "$addr, ${dst:dregpair}",
-               [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]> {
+def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr), IIC_fpLoadm,
+                "vldmia", "$addr, ${dst:dregpair}",
+                [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]> {
   let Inst{27-25} = 0b110;
   let Inst{24}    = 0; // P bit
   let Inst{23}    = 1; // U bit
@@ -156,10 +157,9 @@ def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr),
 }
 
 // Use vstmia to store a Q register as a D register pair.
-def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr),
-               IIC_fpStorem,
-               "vstmia", "$addr, ${src:dregpair}",
-               [(store (v2f64 QPR:$src), addrmode4:$addr)]> {
+def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), IIC_fpStorem,
+                "vstmia", "$addr, ${src:dregpair}",
+                [(store (v2f64 QPR:$src), addrmode4:$addr)]> {
   let Inst{27-25} = 0b110;
   let Inst{24}    = 0; // P bit
   let Inst{23}    = 1; // U bit
@@ -191,6 +191,29 @@ def  VLD1q32  : VLD1Q<0b1000, "vld1", "32", v4i32, int_arm_neon_vld1>;
 def  VLD1qf   : VLD1Q<0b1000, "vld1", "32", v4f32, int_arm_neon_vld1>;
 def  VLD1q64  : VLD1Q<0b1100, "vld1", "64", v2i64, int_arm_neon_vld1>;
 
+// These (dreg triple/quadruple) are for disassembly only.
+class VLD1D3<bits<4> op7_4, string OpcodeStr, string Dt>
+  : NLdSt<0, 0b10, 0b0110, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
+          (ins addrmode6:$addr), IIC_VLD1, OpcodeStr, Dt,
+          "\\{$dst1, $dst2, $dst3\\}, $addr", "",
+          [/* For disassembly only; pattern left blank */]>;
+class VLD1D4<bits<4> op7_4, string OpcodeStr, string Dt>
+  : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+          (ins addrmode6:$addr), IIC_VLD1, OpcodeStr, Dt,
+          "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "",
+          [/* For disassembly only; pattern left blank */]>;
+
+def  VLD1d8T  : VLD1D3<0b0000, "vld1", "8">;
+def  VLD1d16T : VLD1D3<0b0100, "vld1", "16">;
+def  VLD1d32T : VLD1D3<0b1000, "vld1", "32">;
+//def  VLD1d64T : VLD1D3<0b1100, "vld1", "64">;
+
+def  VLD1d8Q  : VLD1D4<0b0000, "vld1", "8">;
+def  VLD1d16Q : VLD1D4<0b0100, "vld1", "16">;
+def  VLD1d32Q : VLD1D4<0b1000, "vld1", "32">;
+//def  VLD1d64Q : VLD1D4<0b1100, "vld1", "64">;
+
+
 let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
 
 //   VLD2     : Vector Load (multiple 2-element structures)
@@ -216,6 +239,16 @@ def  VLD2q8   : VLD2Q<0b0000, "vld2", "8">;
 def  VLD2q16  : VLD2Q<0b0100, "vld2", "16">;
 def  VLD2q32  : VLD2Q<0b1000, "vld2", "32">;
 
+// These (double-spaced dreg pair) are for disassembly only.
+class VLD2Ddbl<bits<4> op7_4, string OpcodeStr, string Dt>
+  : NLdSt<0,0b10,0b1001,op7_4, (outs DPR:$dst1, DPR:$dst2),
+          (ins addrmode6:$addr), IIC_VLD2,
+          OpcodeStr, Dt, "\\{$dst1, $dst2\\}, $addr", "", []>;
+
+def  VLD2d8D  : VLD2Ddbl<0b0000, "vld2", "8">;
+def  VLD2d16D : VLD2Ddbl<0b0100, "vld2", "16">;
+def  VLD2d32D : VLD2Ddbl<0b1000, "vld2", "32">;
+
 //   VLD3     : Vector Load (multiple 3-element structures)
 class VLD3D<bits<4> op7_4, string OpcodeStr, string Dt>
   : NLdSt<0,0b10,0b0100,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
@@ -285,105 +318,64 @@ def  VLD4q32b : VLD4WB<0b1000, "vld4", "32">;
 class VLD2LN<bits<4> op11_8, string OpcodeStr, string Dt>
   : NLdSt<1,0b10,op11_8,{?,?,?,?}, (outs DPR:$dst1, DPR:$dst2),
             (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
-            IIC_VLD2,
-            OpcodeStr, Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr",
+            IIC_VLD2, OpcodeStr, Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr",
             "$src1 = $dst1, $src2 = $dst2", []>;
 
 // vld2 to single-spaced registers.
 def VLD2LNd8  : VLD2LN<0b0001, "vld2", "8">;
-def VLD2LNd16 : VLD2LN<0b0101, "vld2", "16"> {
-  let Inst{5} = 0;
-}
-def VLD2LNd32 : VLD2LN<0b1001, "vld2", "32"> {
-  let Inst{6} = 0;
-}
+def VLD2LNd16 : VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 0; }
+def VLD2LNd32 : VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 0; }
 
 // vld2 to double-spaced even registers.
-def VLD2LNq16a: VLD2LN<0b0101, "vld2", "16"> {
-  let Inst{5} = 1;
-}
-def VLD2LNq32a: VLD2LN<0b1001, "vld2", "32"> {
-  let Inst{6} = 1;
-}
+def VLD2LNq16a: VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 1; }
+def VLD2LNq32a: VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 1; }
 
 // vld2 to double-spaced odd registers.
-def VLD2LNq16b: VLD2LN<0b0101, "vld2", "16"> {
-  let Inst{5} = 1;
-}
-def VLD2LNq32b: VLD2LN<0b1001, "vld2", "32"> {
-  let Inst{6} = 1;
-}
+def VLD2LNq16b: VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 1; }
+def VLD2LNq32b: VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 1; }
 
 //   VLD3LN   : Vector Load (single 3-element structure to one lane)
 class VLD3LN<bits<4> op11_8, string OpcodeStr, string Dt>
   : NLdSt<1,0b10,op11_8,{?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
             (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
-            nohash_imm:$lane), IIC_VLD3,
-            OpcodeStr, Dt,
+            nohash_imm:$lane), IIC_VLD3, OpcodeStr, Dt,
             "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr",
             "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>;
 
 // vld3 to single-spaced registers.
-def VLD3LNd8  : VLD3LN<0b0010, "vld3", "8"> {
-  let Inst{4} = 0;
-}
-def VLD3LNd16 : VLD3LN<0b0110, "vld3", "16"> {
-  let Inst{5-4} = 0b00;
-}
-def VLD3LNd32 : VLD3LN<0b1010, "vld3", "32"> {
-  let Inst{6-4} = 0b000;
-}
+def VLD3LNd8  : VLD3LN<0b0010, "vld3", "8"> { let Inst{4} = 0; }
+def VLD3LNd16 : VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b00; }
+def VLD3LNd32 : VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b000; }
 
 // vld3 to double-spaced even registers.
-def VLD3LNq16a: VLD3LN<0b0110, "vld3", "16"> {
-  let Inst{5-4} = 0b10;
-}
-def VLD3LNq32a: VLD3LN<0b1010, "vld3", "32"> {
-  let Inst{6-4} = 0b100;
-}
+def VLD3LNq16a: VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b10; }
+def VLD3LNq32a: VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b100; }
 
 // vld3 to double-spaced odd registers.
-def VLD3LNq16b: VLD3LN<0b0110, "vld3", "16"> {
-  let Inst{5-4} = 0b10;
-}
-def VLD3LNq32b: VLD3LN<0b1010, "vld3", "32"> {
-  let Inst{6-4} = 0b100;
-}
+def VLD3LNq16b: VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b10; }
+def VLD3LNq32b: VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b100; }
 
 //   VLD4LN   : Vector Load (single 4-element structure to one lane)
 class VLD4LN<bits<4> op11_8, string OpcodeStr, string Dt>
   : NLdSt<1,0b10,op11_8,{?,?,?,?},
             (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
             (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
-            nohash_imm:$lane), IIC_VLD4,
-            OpcodeStr, Dt,
+            nohash_imm:$lane), IIC_VLD4, OpcodeStr, Dt,
           "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr",
             "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>;
 
 // vld4 to single-spaced registers.
 def VLD4LNd8  : VLD4LN<0b0011, "vld4", "8">;
-def VLD4LNd16 : VLD4LN<0b0111, "vld4", "16"> {
-  let Inst{5} = 0;
-}
-def VLD4LNd32 : VLD4LN<0b1011, "vld4", "32"> {
-  let Inst{6} = 0;
-}
+def VLD4LNd16 : VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 0; }
+def VLD4LNd32 : VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 0; }
 
 // vld4 to double-spaced even registers.
-def VLD4LNq16a: VLD4LN<0b0111, "vld4", "16"> {
-  let Inst{5} = 1;
-}
-def VLD4LNq32a: VLD4LN<0b1011, "vld4", "32"> {
-  let Inst{6} = 1;
-}
+def VLD4LNq16a: VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 1; }
+def VLD4LNq32a: VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 1; }
 
 // vld4 to double-spaced odd registers.
-def VLD4LNq16b: VLD4LN<0b0111, "vld4", "16"> {
-  let Inst{5} = 1;
-}
-def VLD4LNq32b: VLD4LN<0b1011, "vld4", "32"> {
-  let Inst{6} = 1;
-}
+def VLD4LNq16b: VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 1; }
+def VLD4LNq32b: VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 1; }
 
 //   VLD1DUP  : Vector Load (single element to all lanes)
 //   VLD2DUP  : Vector Load (single 2-element structure to all lanes)
@@ -418,6 +410,31 @@ def  VST1qf   : VST1Q<0b1000, "vst1", "32", v4f32, int_arm_neon_vst1>;
 def  VST1q64  : VST1Q<0b1100, "vst1", "64", v2i64, int_arm_neon_vst1>;
 } // hasExtraSrcRegAllocReq
 
+// These (dreg triple/quadruple) are for disassembly only.
+class VST1D3<bits<4> op7_4, string OpcodeStr, string Dt>
+  : NLdSt<0, 0b00, 0b0110, op7_4, (outs),
+          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
+          OpcodeStr, Dt,
+          "\\{$src1, $src2, $src3\\}, $addr", "",
+          [/* For disassembly only; pattern left blank */]>;
+class VST1D4<bits<4> op7_4, string OpcodeStr, string Dt>
+  : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
+          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
+          IIC_VST, OpcodeStr, Dt,
+          "\\{$src1, $src2, $src3, $src4\\}, $addr", "",
+          [/* For disassembly only; pattern left blank */]>;
+
+def  VST1d8T  : VST1D3<0b0000, "vst1", "8">;
+def  VST1d16T : VST1D3<0b0100, "vst1", "16">;
+def  VST1d32T : VST1D3<0b1000, "vst1", "32">;
+//def  VST1d64T : VST1D3<0b1100, "vst1", "64">;
+
+def  VST1d8Q  : VST1D4<0b0000, "vst1", "8">;
+def  VST1d16Q : VST1D4<0b0100, "vst1", "16">;
+def  VST1d32Q : VST1D4<0b1000, "vst1", "32">;
+//def  VST1d64Q : VST1D4<0b1100, "vst1", "64">;
+
+
 let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
 
 //   VST2     : Vector Store (multiple 2-element structures)
@@ -428,8 +445,7 @@ class VST2D<bits<4> op7_4, string OpcodeStr, string Dt>
 class VST2Q<bits<4> op7_4, string OpcodeStr, string Dt>
   : NLdSt<0,0b00,0b0011,op7_4, (outs),
           (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
-          IIC_VST,
-          OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
+          IIC_VST, OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
           "", []>;
 
 def  VST2d8   : VST2D<0b0000, "vst2", "8">;
@@ -443,6 +459,16 @@ def  VST2q8   : VST2Q<0b0000, "vst2", "8">;
 def  VST2q16  : VST2Q<0b0100, "vst2", "16">;
 def  VST2q32  : VST2Q<0b1000, "vst2", "32">;
 
+// These (double-spaced dreg pair) are for disassembly only.
+class VST2Ddbl<bits<4> op7_4, string OpcodeStr, string Dt>
+  : NLdSt<0, 0b00, 0b1001, op7_4, (outs),
+          (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST,
+          OpcodeStr, Dt, "\\{$src1, $src2\\}, $addr", "", []>;
+
+def  VST2d8D  : VST2Ddbl<0b0000, "vst2", "8">;
+def  VST2d16D : VST2Ddbl<0b0100, "vst2", "16">;
+def  VST2d32D : VST2Ddbl<0b1000, "vst2", "32">;
+
 //   VST3     : Vector Store (multiple 3-element structures)
 class VST3D<bits<4> op7_4, string OpcodeStr, string Dt>
   : NLdSt<0,0b00,0b0100,op7_4, (outs),
@@ -476,14 +502,12 @@ def  VST3q32b : VST3WB<0b1000, "vst3", "32">;
 class VST4D<bits<4> op7_4, string OpcodeStr, string Dt>
   : NLdSt<0,0b00,0b0000,op7_4, (outs),
           (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
-          IIC_VST,
-          OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
+          IIC_VST, OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
           "", []>;
 class VST4WB<bits<4> op7_4, string OpcodeStr, string Dt>
   : NLdSt<0,0b00,0b0001,op7_4, (outs GPR:$wb),
           (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
-          IIC_VST,
-          OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
+          IIC_VST, OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
           "$addr.addr = $wb", []>;
 
 def  VST4d8   : VST4D<0b0000, "vst4", "8">;
@@ -511,104 +535,63 @@ def  VST4q32b : VST4WB<0b1000, "vst4", "32">;
 //   VST2LN   : Vector Store (single 2-element structure from one lane)
 class VST2LN<bits<4> op11_8, string OpcodeStr, string Dt>
   : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs),
-            (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
-            IIC_VST,
-            OpcodeStr, Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr",
-            "", []>;
+          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
+          IIC_VST, OpcodeStr, Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr",
+          "", []>;
 
 // vst2 to single-spaced registers.
 def VST2LNd8  : VST2LN<0b0001, "vst2", "8">;
-def VST2LNd16 : VST2LN<0b0101, "vst2", "16"> {
-  let Inst{5} = 0;
-}
-def VST2LNd32 : VST2LN<0b1001, "vst2", "32"> {
-  let Inst{6} = 0;
-}
+def VST2LNd16 : VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 0; }
+def VST2LNd32 : VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 0; }
 
 // vst2 to double-spaced even registers.
-def VST2LNq16a: VST2LN<0b0101, "vst2", "16"> {
-  let Inst{5} = 1;
-}
-def VST2LNq32a: VST2LN<0b1001, "vst2", "32"> {
-  let Inst{6} = 1;
-}
+def VST2LNq16a: VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 1; }
+def VST2LNq32a: VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 1; }
 
 // vst2 to double-spaced odd registers.
-def VST2LNq16b: VST2LN<0b0101, "vst2", "16"> {
-  let Inst{5} = 1;
-}
-def VST2LNq32b: VST2LN<0b1001, "vst2", "32"> {
-  let Inst{6} = 1;
-}
+def VST2LNq16b: VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 1; }
+def VST2LNq32b: VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 1; }
 
 //   VST3LN   : Vector Store (single 3-element structure from one lane)
 class VST3LN<bits<4> op11_8, string OpcodeStr, string Dt>
   : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs),
-            (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
-            nohash_imm:$lane), IIC_VST,
-            OpcodeStr, Dt,
-            "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr", "", []>;
+          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
+           nohash_imm:$lane), IIC_VST, OpcodeStr, Dt,
+          "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr", "", []>;
 
 // vst3 to single-spaced registers.
-def VST3LNd8  : VST3LN<0b0010, "vst3", "8"> {
-  let Inst{4} = 0;
-}
-def VST3LNd16 : VST3LN<0b0110, "vst3", "16"> {
-  let Inst{5-4} = 0b00;
-}
-def VST3LNd32 : VST3LN<0b1010, "vst3", "32"> {
-  let Inst{6-4} = 0b000;
-}
+def VST3LNd8  : VST3LN<0b0010, "vst3", "8"> { let Inst{4} = 0; }
+def VST3LNd16 : VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b00; }
+def VST3LNd32 : VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b000; }
 
 // vst3 to double-spaced even registers.
-def VST3LNq16a: VST3LN<0b0110, "vst3", "16"> {
-  let Inst{5-4} = 0b10;
-}
-def VST3LNq32a: VST3LN<0b1010, "vst3", "32"> {
-  let Inst{6-4} = 0b100;
-}
+def VST3LNq16a: VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b10; }
+def VST3LNq32a: VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b100; }
 
 // vst3 to double-spaced odd registers.
-def VST3LNq16b: VST3LN<0b0110, "vst3", "16"> {
-  let Inst{5-4} = 0b10;
-}
-def VST3LNq32b: VST3LN<0b1010, "vst3", "32"> {
-  let Inst{6-4} = 0b100;
-}
+def VST3LNq16b: VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b10; }
+def VST3LNq32b: VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b100; }
 
 //   VST4LN   : Vector Store (single 4-element structure from one lane)
 class VST4LN<bits<4> op11_8, string OpcodeStr, string Dt>
   : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs),
-            (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
-            nohash_imm:$lane), IIC_VST,
-            OpcodeStr, Dt,
+          (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
+           nohash_imm:$lane), IIC_VST, OpcodeStr, Dt,
           "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr",
-            "", []>;
+          "", []>;
 
 // vst4 to single-spaced registers.
 def VST4LNd8  : VST4LN<0b0011, "vst4", "8">;
-def VST4LNd16 : VST4LN<0b0111, "vst4", "16"> {
-  let Inst{5} = 0;
-}
-def VST4LNd32 : VST4LN<0b1011, "vst4", "32"> {
-  let Inst{6} = 0;
-}
+def VST4LNd16 : VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 0; }
+def VST4LNd32 : VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 0; }
 
 // vst4 to double-spaced even registers.
-def VST4LNq16a: VST4LN<0b0111, "vst4", "16"> {
-  let Inst{5} = 1;
-}
-def VST4LNq32a: VST4LN<0b1011, "vst4", "32"> {
-  let Inst{6} = 1;
-}
+def VST4LNq16a: VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 1; }
+def VST4LNq32a: VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 1; }
 
 // vst4 to double-spaced odd registers.
-def VST4LNq16b: VST4LN<0b0111, "vst4", "16"> {
-  let Inst{5} = 1;
-}
-def VST4LNq32b: VST4LN<0b1011, "vst4", "32"> {
-  let Inst{6} = 1;
-}
+def VST4LNq16b: VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 1; }
+def VST4LNq32b: VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 1; }
 
 } // mayStore = 1, hasExtraSrcRegAllocReq = 1
 
@@ -656,34 +639,26 @@ def SubReg_i32_lane : SDNodeXForm<imm, [{
 // Instruction Classes
 //===----------------------------------------------------------------------===//
 
-// Basic 2-register operations, both double- and quad-register.
+// Basic 2-register operations: single-, double- and quad-register.
+class N2VS<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+           bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+           string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
+        (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src),
+        IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "", []>;
 class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
-           bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt,
-           ValueType ResTy, ValueType OpTy, SDNode OpNode>
+           bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+           string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
   : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst),
         (ins DPR:$src), IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "",
         [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src))))]>;
 class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
-           bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt,
-           ValueType ResTy, ValueType OpTy, SDNode OpNode>
+           bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+           string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
   : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst),
         (ins QPR:$src), IIC_VUNAQ, OpcodeStr, Dt, "$dst, $src", "",
         [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src))))]>;
 
-// Basic 2-register operations, scalar single-precision.
-class N2VDs<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
-            bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt,
-            ValueType ResTy, ValueType OpTy, SDNode OpNode>
-  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
-        (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src),
-        IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "", []>;
-
-class N2VDsPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst>
-  : NEONFPPat<(ResTy (OpNode SPR:$a)),
-       (EXTRACT_SUBREG
-           (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0)),
-        arm_ssubreg_0)>;
-
 // Basic 2-register intrinsics, both double- and quad-register.
 class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
               bits<2> op17_16, bits<5> op11_7, bit op4, 
@@ -700,21 +675,6 @@ class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
         (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
         [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>;
 
-// Basic 2-register intrinsics, scalar single-precision
-class N2VDInts<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
-              bits<2> op17_16, bits<5> op11_7, bit op4, 
-              InstrItinClass itin, string OpcodeStr, string Dt,
-              ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
-  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
-        (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), itin,
-        OpcodeStr, Dt, "$dst, $src", "", []>;
-
-class N2VDIntsPat<SDNode OpNode, NeonI Inst>
-  : NEONFPPat<(f32 (OpNode SPR:$a)),
-       (EXTRACT_SUBREG
-           (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0)),
-        arm_ssubreg_0)>;
-
 // Narrow 2-register intrinsics.
 class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
               bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
@@ -742,15 +702,22 @@ class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
 class N2VQShuffle<bits<2> op19_18, bits<5> op11_7,
                   InstrItinClass itin, string OpcodeStr, string Dt>
   : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$dst1, QPR:$dst2),
-        (ins QPR:$src1, QPR:$src2), itin, 
-        OpcodeStr, Dt, "$dst1, $dst2",
+        (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$dst1, $dst2",
         "$src1 = $dst1, $src2 = $dst2", []>;
 
-// Basic 3-register operations, both double- and quad-register.
+// Basic 3-register operations: single-, double- and quad-register.
+class N3VS<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+           string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
+           SDNode OpNode, bit Commutable>
+  : N3V<op24, op23, op21_20, op11_8, 0, op4,
+        (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND,
+        OpcodeStr, Dt, "$dst, $src1, $src2", "", []> {
+  let isCommutable = Commutable;
+}
+
 class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
            InstrItinClass itin, string OpcodeStr, string Dt,
-           ValueType ResTy, ValueType OpTy,
-           SDNode OpNode, bit Commutable>
+           ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
   : N3V<op24, op23, op21_20, op11_8, 0, op4,
         (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin, 
         OpcodeStr, Dt, "$dst, $src1, $src2", "",
@@ -763,9 +730,9 @@ class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
            ValueType ResTy, ValueType OpTy,
            SDNode OpNode, bit Commutable>
   : N3VX<op24, op23, op21_20, op11_8, 0, op4,
-        (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin, 
-        OpcodeStr, "$dst, $src1, $src2", "",
-        [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]> {
+         (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin, 
+         OpcodeStr, "$dst, $src1, $src2", "",
+         [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]>{
   let isCommutable = Commutable;
 }
 class N3VDSL<bits<2> op21_20, bits<4> op11_8, 
@@ -776,27 +743,23 @@ class N3VDSL<bits<2> op21_20, bits<4> op11_8,
         itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
         [(set (Ty DPR:$dst),
               (Ty (ShOp (Ty DPR:$src1),
-                        (Ty (NEONvduplane (Ty DPR_VFP2:$src2),
-                                          imm:$lane)))))]> {
+                        (Ty (NEONvduplane (Ty DPR_VFP2:$src2), imm:$lane)))))]>{
   let isCommutable = 0;
 }
 class N3VDSL16<bits<2> op21_20, bits<4> op11_8, 
                string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
   : N3V<0, 1, op21_20, op11_8, 1, 0,
         (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane),
-        IIC_VMULi16D,
-        OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+        IIC_VMULi16D, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
         [(set (Ty DPR:$dst),
               (Ty (ShOp (Ty DPR:$src1),
-                        (Ty (NEONvduplane (Ty DPR_8:$src2),
-                                          imm:$lane)))))]> {
+                        (Ty (NEONvduplane (Ty DPR_8:$src2), imm:$lane)))))]> {
   let isCommutable = 0;
 }
 
 class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
            InstrItinClass itin, string OpcodeStr, string Dt,
-           ValueType ResTy, ValueType OpTy,
-           SDNode OpNode, bit Commutable>
+           ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
   : N3V<op24, op23, op21_20, op11_8, 1, op4,
         (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin, 
         OpcodeStr, Dt, "$dst, $src1, $src2", "",
@@ -805,12 +768,11 @@ class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
 }
 class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
            InstrItinClass itin, string OpcodeStr,
-           ValueType ResTy, ValueType OpTy,
-           SDNode OpNode, bit Commutable>
+           ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
   : N3VX<op24, op23, op21_20, op11_8, 1, op4,
-        (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin, 
-        OpcodeStr, "$dst, $src1, $src2", "",
-        [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]> {
+         (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin, 
+         OpcodeStr, "$dst, $src1, $src2", "",
+         [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]>{
   let isCommutable = Commutable;
 }
 class N3VQSL<bits<2> op21_20, bits<4> op11_8, 
@@ -825,13 +787,11 @@ class N3VQSL<bits<2> op21_20, bits<4> op11_8,
                                                 imm:$lane)))))]> {
   let isCommutable = 0;
 }
-class N3VQSL16<bits<2> op21_20, bits<4> op11_8, 
-               string OpcodeStr, string Dt,
+class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
                ValueType ResTy, ValueType OpTy, SDNode ShOp>
   : N3V<1, 1, op21_20, op11_8, 1, 0,
         (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane),
-        IIC_VMULi16Q,
-        OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+        IIC_VMULi16Q, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
         [(set (ResTy QPR:$dst),
               (ResTy (ShOp (ResTy QPR:$src1),
                            (ResTy (NEONvduplane (OpTy DPR_8:$src2),
@@ -839,27 +799,10 @@ class N3VQSL16<bits<2> op21_20, bits<4> op11_8,
   let isCommutable = 0;
 }
 
-// Basic 3-register operations, scalar single-precision
-class N3VDs<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
-           string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
-           SDNode OpNode, bit Commutable>
-  : N3V<op24, op23, op21_20, op11_8, 0, op4,
-        (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND,
-        OpcodeStr, Dt, "$dst, $src1, $src2", "", []> {
-  let isCommutable = Commutable;
-}
-class N3VDsPat<SDNode OpNode, NeonI Inst>
-  : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
-       (EXTRACT_SUBREG
-           (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0),
-                 (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$b, arm_ssubreg_0)),
-        arm_ssubreg_0)>;
-
 // Basic 3-register intrinsics, both double- and quad-register.
 class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
               InstrItinClass itin, string OpcodeStr, string Dt,
-              ValueType ResTy, ValueType OpTy,
-              Intrinsic IntOp, bit Commutable>
+              ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
   : N3V<op24, op23, op21_20, op11_8, 0, op4,
         (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin, 
         OpcodeStr, Dt, "$dst, $src1, $src2", "",
@@ -891,8 +834,7 @@ class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
 
 class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
               InstrItinClass itin, string OpcodeStr, string Dt,
-              ValueType ResTy, ValueType OpTy,
-              Intrinsic IntOp, bit Commutable>
+              ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
   : N3V<op24, op23, op21_20, op11_8, 1, op4,
         (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin, 
         OpcodeStr, Dt, "$dst, $src1, $src2", "",
@@ -924,7 +866,15 @@ class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
   let isCommutable = 0;
 }
 
-// Multiply-Add/Sub operations, both double- and quad-register.
+// Multiply-Add/Sub operations: single-, double- and quad-register.
+class N3VSMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+                InstrItinClass itin, string OpcodeStr, string Dt,
+                ValueType Ty, SDNode MulOp, SDNode OpNode>
+  : N3V<op24, op23, op21_20, op11_8, 0, op4,
+        (outs DPR_VFP2:$dst),
+        (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), itin,
+        OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", []>;
+
 class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
                 InstrItinClass itin, string OpcodeStr, string Dt,
                 ValueType Ty, SDNode MulOp, SDNode OpNode>
@@ -976,8 +926,8 @@ class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
         [(set (ResTy QPR:$dst),
               (ResTy (ShOp (ResTy QPR:$src1),
                            (ResTy (MulOp QPR:$src2,
-                                         (ResTy (NEONvduplane (OpTy DPR_VFP2:$src3),
-                                                              imm:$lane)))))))]>;
+                                   (ResTy (NEONvduplane (OpTy DPR_VFP2:$src3),
+                                                        imm:$lane)))))))]>;
 class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                     string OpcodeStr, string Dt,
                     ValueType ResTy, ValueType OpTy,
@@ -989,25 +939,8 @@ class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
         [(set (ResTy QPR:$dst),
               (ResTy (ShOp (ResTy QPR:$src1),
                            (ResTy (MulOp QPR:$src2,
-                                         (ResTy (NEONvduplane (OpTy DPR_8:$src3),
-                                                              imm:$lane)))))))]>;
-
-// Multiply-Add/Sub operations, scalar single-precision
-class N3VDMulOps<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
-                 InstrItinClass itin, string OpcodeStr, string Dt,
-                 ValueType Ty, SDNode MulOp, SDNode OpNode>
-  : N3V<op24, op23, op21_20, op11_8, 0, op4,
-        (outs DPR_VFP2:$dst),
-        (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), itin,
-        OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", []>;
-
-class N3VDMulOpsPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
-  : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
-      (EXTRACT_SUBREG
-          (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$acc, arm_ssubreg_0),
-                (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a,   arm_ssubreg_0),
-                (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$b,   arm_ssubreg_0)),
-       arm_ssubreg_0)>;
+                                   (ResTy (NEONvduplane (OpTy DPR_8:$src3),
+                                                        imm:$lane)))))))]>;
 
 // Neon 3-argument intrinsics, both double- and quad-register.
 // The destination register is also used as the first source operand register.
@@ -1050,9 +983,9 @@ class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                             (OpTy DPR:$src2),
                             (OpTy (NEONvduplane (OpTy DPR_VFP2:$src3),
                                                 imm:$lane)))))]>;
-class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
-                   string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
-                   Intrinsic IntOp>
+class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+                   InstrItinClass itin, string OpcodeStr, string Dt,
+                   ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N3V<op24, 1, op21_20, op11_8, 1, 0,
         (outs QPR:$dst),
         (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin,
@@ -1063,7 +996,6 @@ class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass iti
                             (OpTy (NEONvduplane (OpTy DPR_8:$src3),
                                                 imm:$lane)))))]>;
 
-
 // Narrowing 3-register intrinsics.
 class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
               string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
@@ -1095,9 +1027,9 @@ class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
               (ResTy (IntOp (OpTy DPR:$src1),
                             (OpTy (NEONvduplane (OpTy DPR_VFP2:$src2),
                                                 imm:$lane)))))]>;
-class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
-                  string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, 
-                  Intrinsic IntOp>
+class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+                  InstrItinClass itin, string OpcodeStr, string Dt,
+                  ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N3V<op24, 1, op21_20, op11_8, 1, 0,
         (outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), 
         itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
@@ -1249,6 +1181,45 @@ class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
 //   S = single int (32 bit) elements
 //   D = double int (64 bit) elements
 
+// Neon 2-register vector operations -- for disassembly only.
+
+// First with only element sizes of 8, 16 and 32 bits:
+multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+                       bits<5> op11_7, bit op4, string opc, string Dt,
+                       string asm> {
+  // 64-bit vector types.
+  def v8i8  : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
+                  (outs DPR:$dst), (ins DPR:$src), NoItinerary,
+                  opc, !strconcat(Dt, "8"), asm, "", []>;
+  def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
+                  (outs DPR:$dst), (ins DPR:$src), NoItinerary,
+                  opc, !strconcat(Dt, "16"), asm, "", []>;
+  def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
+                  (outs DPR:$dst), (ins DPR:$src), NoItinerary,
+                  opc, !strconcat(Dt, "32"), asm, "", []>;
+  def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
+                  (outs DPR:$dst), (ins DPR:$src), NoItinerary,
+                  opc, "f32", asm, "", []> {
+    let Inst{10} = 1; // overwrite F = 1
+  }
+
+  // 128-bit vector types.
+  def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
+                  (outs QPR:$dst), (ins QPR:$src), NoItinerary,
+                  opc, !strconcat(Dt, "8"), asm, "", []>;
+  def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
+                  (outs QPR:$dst), (ins QPR:$src), NoItinerary,
+                  opc, !strconcat(Dt, "16"), asm, "", []>;
+  def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
+                  (outs QPR:$dst), (ins QPR:$src), NoItinerary,
+                  opc, !strconcat(Dt, "32"), asm, "", []>;
+  def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
+                  (outs QPR:$dst), (ins QPR:$src), NoItinerary,
+                  opc, "f32", asm, "", []> {
+    let Inst{10} = 1; // overwrite F = 1
+  }
+}
+
 // Neon 3-register vector operations.
 
 // First with only element sizes of 8, 16 and 32 bits:
@@ -1262,22 +1233,22 @@ multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
                    OpcodeStr, !strconcat(Dt, "8"),
                    v8i8, v8i8, OpNode, Commutable>;
   def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16,
-                 OpcodeStr, !strconcat(Dt, "16"),
-                 v4i16, v4i16, OpNode, Commutable>;
+                   OpcodeStr, !strconcat(Dt, "16"),
+                   v4i16, v4i16, OpNode, Commutable>;
   def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32,
-                 OpcodeStr, !strconcat(Dt, "32"),
-                 v2i32, v2i32, OpNode, Commutable>;
+                   OpcodeStr, !strconcat(Dt, "32"),
+                   v2i32, v2i32, OpNode, Commutable>;
 
   // 128-bit vector types.
   def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16,
-                  OpcodeStr, !strconcat(Dt, "8"),
-                  v16i8, v16i8, OpNode, Commutable>;
+                   OpcodeStr, !strconcat(Dt, "8"),
+                   v16i8, v16i8, OpNode, Commutable>;
   def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16,
-                 OpcodeStr, !strconcat(Dt, "16"),
-                 v8i16, v8i16, OpNode, Commutable>;
+                   OpcodeStr, !strconcat(Dt, "16"),
+                   v8i16, v8i16, OpNode, Commutable>;
   def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32,
-                 OpcodeStr, !strconcat(Dt, "32"),
-                 v4i32, v4i32, OpNode, Commutable>;
+                   OpcodeStr, !strconcat(Dt, "32"),
+                   v4i32, v4i32, OpNode, Commutable>;
 }
 
 multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, string Dt, SDNode ShOp> {
@@ -1372,7 +1343,7 @@ multiclass N3VIntSL_HS<bits<4> op11_8,
   def v2i32 : N3VDIntSL<0b10, op11_8, itinD32,
                         OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>;
   def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16,
-                        OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>;
+                          OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>;
   def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32,
                         OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>;
 }
@@ -1386,8 +1357,8 @@ multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
   : N3VInt_HS<op24, op23, op11_8, op4, itinD16, itinD32, itinQ16, itinQ32,
               OpcodeStr, Dt, IntOp, Commutable> {
   def v8i8  : N3VDInt<op24, op23, 0b00, op11_8, op4, itinD16,
-                     OpcodeStr, !strconcat(Dt, "8"),
-                     v8i8, v8i8, IntOp, Commutable>;
+                      OpcodeStr, !strconcat(Dt, "8"),
+                      v8i8, v8i8, IntOp, Commutable>;
   def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, itinQ16,
                       OpcodeStr, !strconcat(Dt, "8"),
                       v16i8, v16i8, IntOp, Commutable>;
@@ -1402,11 +1373,11 @@ multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
   : N3VInt_QHS<op24, op23, op11_8, op4, itinD16, itinD32, itinQ16, itinQ32,
                OpcodeStr, Dt, IntOp, Commutable> {
   def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, itinD32,
-                   OpcodeStr, !strconcat(Dt, "64"),
-                   v1i64, v1i64, IntOp, Commutable>;
+                      OpcodeStr, !strconcat(Dt, "64"),
+                      v1i64, v1i64, IntOp, Commutable>;
   def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, itinQ32,
-                   OpcodeStr, !strconcat(Dt, "64"),
-                   v2i64, v2i64, IntOp, Commutable>;
+                      OpcodeStr, !strconcat(Dt, "64"),
+                      v2i64, v2i64, IntOp, Commutable>;
 }
 
 
@@ -1511,9 +1482,11 @@ multiclass N3VMulOpSL_HS<bits<4> op11_8,
   def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
                           OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>;
   def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16,
-                      OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, mul, ShOp>;
+                            OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16,
+                            mul, ShOp>;
   def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32,
-                      OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, mul, ShOp>;
+                          OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32,
+                          mul, ShOp>;
 }
 
 // Neon 3-argument intrinsics,
@@ -1522,19 +1495,19 @@ multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
                        string OpcodeStr, string Dt, Intrinsic IntOp> {
   // 64-bit vector types.
   def v8i8  : N3VDInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16D,
-                        OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
+                       OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
   def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D,
-                        OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
+                       OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
   def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32D,
-                        OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
+                       OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
 
   // 128-bit vector types.
   def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16Q,
-                        OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
+                       OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
   def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16Q,
-                        OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
+                       OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
   def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32Q,
-                        OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
+                       OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
 }
 
 
@@ -1576,17 +1549,17 @@ multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
   def v8i8  : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
                       itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
   def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
-                   itinD, OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
+                      itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>;
   def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
-                   itinD, OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
+                      itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>;
 
   // 128-bit vector types.
   def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
-                    itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
+                      itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>;
   def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
-                   itinQ, OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
+                      itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>;
   def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
-                   itinQ, OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
+                      itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>;
 }
 
 
@@ -1846,29 +1819,31 @@ def  VMULpd   : N3VDInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16D, "vmul", "p8",
 def  VMULpq   : N3VQInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16Q, "vmul", "p8",
                         v16i8, v16i8, int_arm_neon_vmulp, 1>;
 def  VMULfd   : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VBIND, "vmul", "f32",
-                        v2f32, v2f32, fmul, 1>;
+                     v2f32, v2f32, fmul, 1>;
 def  VMULfq   : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VBINQ, "vmul", "f32",
-                        v4f32, v4f32, fmul, 1>;
-defm VMULsl  : N3VSL_HS<0b1000, "vmul", "i", mul>;
-def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
-def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, v2f32, fmul>;
+                     v4f32, v4f32, fmul, 1>;
+defm VMULsl   : N3VSL_HS<0b1000, "vmul", "i", mul>;
+def  VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
+def  VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
+                       v2f32, fmul>;
+
 def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
                       (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
           (v8i16 (VMULslv8i16 (v8i16 QPR:$src1),
                               (v4i16 (EXTRACT_SUBREG QPR:$src2,
-                                                     (DSubReg_i16_reg imm:$lane))),
+                                      (DSubReg_i16_reg imm:$lane))),
                               (SubReg_i16_lane imm:$lane)))>;
 def : Pat<(v4i32 (mul (v4i32 QPR:$src1),
                       (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))),
           (v4i32 (VMULslv4i32 (v4i32 QPR:$src1),
                               (v2i32 (EXTRACT_SUBREG QPR:$src2,
-                                                     (DSubReg_i32_reg imm:$lane))),
+                                      (DSubReg_i32_reg imm:$lane))),
                               (SubReg_i32_lane imm:$lane)))>;
 def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
                        (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))),
           (v4f32 (VMULslfq (v4f32 QPR:$src1),
                            (v2f32 (EXTRACT_SUBREG QPR:$src2,
-                                                  (DSubReg_i32_reg imm:$lane))),
+                                   (DSubReg_i32_reg imm:$lane))),
                            (SubReg_i32_lane imm:$lane)))>;
 
 //   VQDMULH  : Vector Saturating Doubling Multiply Returning High Half
@@ -1883,14 +1858,14 @@ def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1),
                                                             imm:$lane)))),
           (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1),
                                  (v4i16 (EXTRACT_SUBREG QPR:$src2,
-                                                  (DSubReg_i16_reg imm:$lane))),
+                                         (DSubReg_i16_reg imm:$lane))),
                                  (SubReg_i16_lane imm:$lane)))>;
 def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
                                        (v4i32 (NEONvduplane (v4i32 QPR:$src2),
                                                             imm:$lane)))),
           (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1),
                                  (v2i32 (EXTRACT_SUBREG QPR:$src2,
-                                                  (DSubReg_i32_reg imm:$lane))),
+                                         (DSubReg_i32_reg imm:$lane))),
                                  (SubReg_i32_lane imm:$lane)))>;
 
 //   VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
@@ -1905,14 +1880,14 @@ def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1),
                                                              imm:$lane)))),
           (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1),
                                   (v4i16 (EXTRACT_SUBREG QPR:$src2,
-                                                         (DSubReg_i16_reg imm:$lane))),
+                                          (DSubReg_i16_reg imm:$lane))),
                                   (SubReg_i16_lane imm:$lane)))>;
 def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
                                         (v4i32 (NEONvduplane (v4i32 QPR:$src2),
                                                              imm:$lane)))),
           (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1),
                                   (v2i32 (EXTRACT_SUBREG QPR:$src2,
-                                                  (DSubReg_i32_reg imm:$lane))),
+                                          (DSubReg_i32_reg imm:$lane))),
                                   (SubReg_i32_lane imm:$lane)))>;
 
 //   VMULL    : Vector Multiply Long (integer and polynomial) (Q = D * D)
@@ -1950,30 +1925,28 @@ def  VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
                             v4f32, v2f32, fmul, fadd>;
 
 def : Pat<(v8i16 (add (v8i16 QPR:$src1),
-                      (mul (v8i16 QPR:$src2),
-                           (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
-          (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1),
-                              (v8i16 QPR:$src2),
+                  (mul (v8i16 QPR:$src2),
+                       (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
+          (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
                               (v4i16 (EXTRACT_SUBREG QPR:$src3,
-                                                     (DSubReg_i16_reg imm:$lane))),
+                                      (DSubReg_i16_reg imm:$lane))),
                               (SubReg_i16_lane imm:$lane)))>;
 
 def : Pat<(v4i32 (add (v4i32 QPR:$src1),
-                      (mul (v4i32 QPR:$src2),
-                           (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
-          (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1),
-                              (v4i32 QPR:$src2),
+                  (mul (v4i32 QPR:$src2),
+                       (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
+          (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
                               (v2i32 (EXTRACT_SUBREG QPR:$src3,
-                                                  (DSubReg_i32_reg imm:$lane))),
+                                      (DSubReg_i32_reg imm:$lane))),
                               (SubReg_i32_lane imm:$lane)))>;
 
 def : Pat<(v4f32 (fadd (v4f32 QPR:$src1),
-                       (fmul (v4f32 QPR:$src2),
-                             (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
+                  (fmul (v4f32 QPR:$src2),
+                        (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
           (v4f32 (VMLAslfq (v4f32 QPR:$src1),
                            (v4f32 QPR:$src2),
                            (v2f32 (EXTRACT_SUBREG QPR:$src3,
-                                                  (DSubReg_i32_reg imm:$lane))),
+                                   (DSubReg_i32_reg imm:$lane))),
                            (SubReg_i32_lane imm:$lane)))>;
 
 //   VMLAL    : Vector Multiply Accumulate Long (Q += D * D)
@@ -2003,30 +1976,27 @@ def  VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
                             v4f32, v2f32, fmul, fsub>;
 
 def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
-                      (mul (v8i16 QPR:$src2),
-                           (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
-          (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1),
-                              (v8i16 QPR:$src2),
+                  (mul (v8i16 QPR:$src2),
+                       (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
+          (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
                               (v4i16 (EXTRACT_SUBREG QPR:$src3,
-                                                     (DSubReg_i16_reg imm:$lane))),
+                                      (DSubReg_i16_reg imm:$lane))),
                               (SubReg_i16_lane imm:$lane)))>;
 
 def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
-                      (mul (v4i32 QPR:$src2),
-                         (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
-          (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1),
-                              (v4i32 QPR:$src2),
+                  (mul (v4i32 QPR:$src2),
+                     (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
+          (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
                               (v2i32 (EXTRACT_SUBREG QPR:$src3,
-                                                     (DSubReg_i32_reg imm:$lane))),
+                                      (DSubReg_i32_reg imm:$lane))),
                               (SubReg_i32_lane imm:$lane)))>;
 
 def : Pat<(v4f32 (fsub (v4f32 QPR:$src1),
-                       (fmul (v4f32 QPR:$src2),
-                           (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
-          (v4f32 (VMLSslfq (v4f32 QPR:$src1),
-                           (v4f32 QPR:$src2),
+                  (fmul (v4f32 QPR:$src2),
+                        (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
+          (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2),
                            (v2f32 (EXTRACT_SUBREG QPR:$src3,
-                                                  (DSubReg_i32_reg imm:$lane))),
+                                   (DSubReg_i32_reg imm:$lane))),
                            (SubReg_i32_lane imm:$lane)))>;
 
 //   VMLSL    : Vector Multiply Subtract Long (Q -= D * D)
@@ -2088,6 +2058,10 @@ def  VCEQfd   : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
                      NEONvceq, 1>;
 def  VCEQfq   : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
                      NEONvceq, 1>;
+// For disassembly only.
+defm VCEQz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
+                           "$dst, $src, #0">;
+
 //   VCGE     : Vector Compare Greater Than or Equal
 defm VCGEs    : N3V_QHS<0, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
                         IIC_VBINi4Q, "vcge", "s", NEONvcge, 0>;
@@ -2097,6 +2071,13 @@ def  VCGEfd   : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32",
                      v2i32, v2f32, NEONvcge, 0>;
 def  VCGEfq   : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
                      NEONvcge, 0>;
+// For disassembly only.
+defm VCGEz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
+                            "$dst, $src, #0">;
+// For disassembly only.
+defm VCLEz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
+                            "$dst, $src, #0">;
+
 //   VCGT     : Vector Compare Greater Than
 defm VCGTs    : N3V_QHS<0, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, 
                         IIC_VBINi4Q, "vcgt", "s", NEONvcgt, 0>;
@@ -2106,6 +2087,13 @@ def  VCGTfd   : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
                      NEONvcgt, 0>;
 def  VCGTfq   : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
                      NEONvcgt, 0>;
+// For disassembly only.
+defm VCGTz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
+                            "$dst, $src, #0">;
+// For disassembly only.
+defm VCLTz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
+                            "$dst, $src, #0">;
+
 //   VACGE    : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
 def  VACGEd   : N3VDInt<1, 0, 0b00, 0b1110, 1, IIC_VBIND, "vacge", "f32",
                         v2i32, v2f32, int_arm_neon_vacged, 0>;
@@ -2247,9 +2235,9 @@ defm VABALu   : N3VLInt3_QHS<1,1,0b0101,0, "vabal", "u", int_arm_neon_vabalu>;
 // Vector Maximum and Minimum.
 
 //   VMAX     : Vector Maximum
-defm VMAXs    : N3VInt_QHS<0, 0, 0b0110, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+defm VMAXs    : N3VInt_QHS<0,0,0b0110,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
                            IIC_VBINi4Q, "vmax", "s", int_arm_neon_vmaxs, 1>;
-defm VMAXu    : N3VInt_QHS<1, 0, 0b0110, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+defm VMAXu    : N3VInt_QHS<1,0,0b0110,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
                            IIC_VBINi4Q, "vmax", "u", int_arm_neon_vmaxu, 1>;
 def  VMAXfd   : N3VDInt<0, 0, 0b00, 0b1111, 0, IIC_VBIND, "vmax", "f32",
                         v2f32, v2f32, int_arm_neon_vmaxs, 1>;
@@ -2257,9 +2245,9 @@ def  VMAXfq   : N3VQInt<0, 0, 0b00, 0b1111, 0, IIC_VBINQ, "vmax", "f32",
                         v4f32, v4f32, int_arm_neon_vmaxs, 1>;
 
 //   VMIN     : Vector Minimum
-defm VMINs    : N3VInt_QHS<0, 0, 0b0110, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+defm VMINs    : N3VInt_QHS<0,0,0b0110,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
                            IIC_VBINi4Q, "vmin", "s", int_arm_neon_vmins, 1>;
-defm VMINu    : N3VInt_QHS<1, 0, 0b0110, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+defm VMINu    : N3VInt_QHS<1,0,0b0110,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
                            IIC_VBINi4Q, "vmin", "u", int_arm_neon_vminu, 1>;
 def  VMINfd   : N3VDInt<0, 0, 0b10, 0b1111, 0, IIC_VBIND, "vmin", "f32",
                         v2f32, v2f32, int_arm_neon_vmins, 1>;
@@ -2401,16 +2389,17 @@ def  VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
                           v2i64, v2i32, NEONvshlli>;
 
 //   VSHRN    : Vector Shift Right and Narrow
-defm VSHRN    : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", NEONvshrn>;
+defm VSHRN    : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
+                           NEONvshrn>;
 
 //   VRSHL    : Vector Rounding Shift
 defm VRSHLs   : N3VInt_QHSD<0,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
-                            IIC_VSHLi4Q, "vrshl", "s", int_arm_neon_vrshifts, 0>;
+                            IIC_VSHLi4Q, "vrshl", "s", int_arm_neon_vrshifts,0>;
 defm VRSHLu   : N3VInt_QHSD<1,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
-                            IIC_VSHLi4Q, "vrshl", "u", int_arm_neon_vrshiftu, 0>;
+                            IIC_VSHLi4Q, "vrshl", "u", int_arm_neon_vrshiftu,0>;
 //   VRSHR    : Vector Rounding Shift Right
-defm VRSHRs   : N2VSh_QHSD<0, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs>;
-defm VRSHRu   : N2VSh_QHSD<1, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru>;
+defm VRSHRs   : N2VSh_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs>;
+defm VRSHRu   : N2VSh_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru>;
 
 //   VRSHRN   : Vector Rounding Shift Right and Narrow
 defm VRSHRN   : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
@@ -2418,14 +2407,14 @@ defm VRSHRN   : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
 
 //   VQSHL    : Vector Saturating Shift
 defm VQSHLs   : N3VInt_QHSD<0,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
-                            IIC_VSHLi4Q, "vqshl", "s", int_arm_neon_vqshifts, 0>;
+                            IIC_VSHLi4Q, "vqshl", "s", int_arm_neon_vqshifts,0>;
 defm VQSHLu   : N3VInt_QHSD<1,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
-                            IIC_VSHLi4Q, "vqshl", "u", int_arm_neon_vqshiftu, 0>;
+                            IIC_VSHLi4Q, "vqshl", "u", int_arm_neon_vqshiftu,0>;
 //   VQSHL    : Vector Saturating Shift Left (Immediate)
-defm VQSHLsi  : N2VSh_QHSD<0, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl", "s", NEONvqshls>;
-defm VQSHLui  : N2VSh_QHSD<1, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl", "u", NEONvqshlu>;
+defm VQSHLsi  : N2VSh_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s", NEONvqshls>;
+defm VQSHLui  : N2VSh_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u", NEONvqshlu>;
 //   VQSHLU   : Vector Saturating Shift Left (Immediate, Unsigned)
-defm VQSHLsu  : N2VSh_QHSD<1, 1, 0b0110, 1, IIC_VSHLi4D, "vqshlu", "s", NEONvqshlsu>;
+defm VQSHLsu  : N2VSh_QHSD<1,1,0b0110,1, IIC_VSHLi4D, "vqshlu","s",NEONvqshlsu>;
 
 //   VQSHRN   : Vector Saturating Shift Right and Narrow
 defm VQSHRNs  : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
@@ -2438,10 +2427,10 @@ defm VQSHRUN  : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
                            NEONvqshrnsu>;
 
 //   VQRSHL   : Vector Saturating Rounding Shift
-defm VQRSHLs  : N3VInt_QHSD<0, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
+defm VQRSHLs  : N3VInt_QHSD<0,0,0b0101,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
                             IIC_VSHLi4Q, "vqrshl", "s",
                             int_arm_neon_vqrshifts, 0>;
-defm VQRSHLu  : N3VInt_QHSD<1, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
+defm VQRSHLu  : N3VInt_QHSD<1,0,0b0101,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
                             IIC_VSHLi4Q, "vqrshl", "u",
                             int_arm_neon_vqrshiftu, 0>;
 
@@ -2508,7 +2497,7 @@ def  VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>;
 def  VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>;
 
 //   VNEG     : Vector Negate (floating-point)
-def  VNEGf32d : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
+def  VNEGfd   : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
                     (outs DPR:$dst), (ins DPR:$src), IIC_VUNAD,
                     "vneg", "f32", "$dst, $src", "",
                     [(set DPR:$dst, (v2f32 (fneg DPR:$src)))]>;
@@ -2547,6 +2536,14 @@ def  VCNTq    : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
                         IIC_VCNTiQ, "vcnt", "8",
                         v16i8, v16i8, int_arm_neon_vcnt>;
 
+// Vector Swap -- for disassembly only.
+def  VSWPd    : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
+                     (outs DPR:$dst), (ins DPR:$src), NoItinerary,
+                     "vswp", "$dst, $src", "", []>;
+def  VSWPq    : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
+                     (outs QPR:$dst), (ins QPR:$src), NoItinerary,
+                     "vswp", "$dst, $src", "", []>;
+
 // Vector Move Operations.
 
 //   VMOV     : Vector Move (Register)
@@ -2678,10 +2675,10 @@ def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
                              (DSubReg_i32_reg imm:$lane))),
                      (SubReg_i32_lane imm:$lane))>;
 def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
-          (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1), DPR_VFP2)),
+          (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)),
                           (SSubReg_f32_reg imm:$src2))>;
 def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2),
-          (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1), QPR_VFP2)),
+          (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)),
                           (SSubReg_f32_reg imm:$src2))>;
 //def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2),
 //          (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
@@ -2849,11 +2846,13 @@ def  VDUPfqf  : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0,
 
 def : Pat<(v2i64 (NEONvduplane (v2i64 QPR:$src), imm:$lane)),
           (INSERT_SUBREG QPR:$src, 
-                         (i64 (EXTRACT_SUBREG QPR:$src, (DSubReg_f64_reg imm:$lane))),
+                         (i64 (EXTRACT_SUBREG QPR:$src,
+                               (DSubReg_f64_reg imm:$lane))),
                          (DSubReg_f64_other_reg imm:$lane))>;
 def : Pat<(v2f64 (NEONvduplane (v2f64 QPR:$src), imm:$lane)),
           (INSERT_SUBREG QPR:$src, 
-                         (f64 (EXTRACT_SUBREG QPR:$src, (DSubReg_f64_reg imm:$lane))),
+                         (f64 (EXTRACT_SUBREG QPR:$src,
+                               (DSubReg_f64_reg imm:$lane))),
                          (DSubReg_f64_other_reg imm:$lane))>;
 
 //   VMOVN    : Vector Narrowing Move
@@ -3092,70 +3091,110 @@ def  VTBX4
 // NEON instructions for single-precision FP math
 //===----------------------------------------------------------------------===//
 
+class N2VSPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst>
+  : NEONFPPat<(ResTy (OpNode SPR:$a)),
+              (EXTRACT_SUBREG (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)),
+                                                   SPR:$a, arm_ssubreg_0)),
+                              arm_ssubreg_0)>;
+
+class N3VSPat<SDNode OpNode, NeonI Inst>
+  : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
+              (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
+                                                   SPR:$a, arm_ssubreg_0),
+                                    (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
+                                                   SPR:$b, arm_ssubreg_0)),
+                              arm_ssubreg_0)>;
+
+class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
+  : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
+              (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
+                                                   SPR:$acc, arm_ssubreg_0),
+                                    (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
+                                                   SPR:$a, arm_ssubreg_0),
+                                    (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
+                                                   SPR:$b, arm_ssubreg_0)),
+                              arm_ssubreg_0)>;
+
 // These need separate instructions because they must use DPR_VFP2 register
 // class which have SPR sub-registers.
 
 // Vector Add Operations used for single-precision FP
 let neverHasSideEffects = 1 in
-def VADDfd_sfp : N3VDs<0, 0, 0b00, 0b1101, 0, "vadd", "f32", v2f32, v2f32, fadd,1>;
-def : N3VDsPat<fadd, VADDfd_sfp>;
+def VADDfd_sfp : N3VS<0,0,0b00,0b1101,0, "vadd", "f32", v2f32, v2f32, fadd, 1>;
+def : N3VSPat<fadd, VADDfd_sfp>;
 
 // Vector Sub Operations used for single-precision FP
 let neverHasSideEffects = 1 in
-def VSUBfd_sfp : N3VDs<0, 0, 0b10, 0b1101, 0, "vsub", "f32", v2f32, v2f32, fsub,0>;
-def : N3VDsPat<fsub, VSUBfd_sfp>;
+def VSUBfd_sfp : N3VS<0,0,0b10,0b1101,0, "vsub", "f32", v2f32, v2f32, fsub, 0>;
+def : N3VSPat<fsub, VSUBfd_sfp>;
 
 // Vector Multiply Operations used for single-precision FP
 let neverHasSideEffects = 1 in
-def VMULfd_sfp : N3VDs<1, 0, 0b00, 0b1101, 1, "vmul", "f32", v2f32, v2f32, fmul,1>;
-def : N3VDsPat<fmul, VMULfd_sfp>;
+def VMULfd_sfp : N3VS<1,0,0b00,0b1101,1, "vmul", "f32", v2f32, v2f32, fmul, 1>;
+def : N3VSPat<fmul, VMULfd_sfp>;
 
 // Vector Multiply-Accumulate/Subtract used for single-precision FP
 // vml[as].f32 can cause 4-8 cycle stalls in following ASIMD instructions, so
 // we want to avoid them for now. e.g., alternating vmla/vadd instructions.
 
 //let neverHasSideEffects = 1 in
-//def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", v2f32,fmul,fadd>;
-//def : N3VDMulOpsPat<fmul, fadd, VMLAfd_sfp>;
+//def VMLAfd_sfp : N3VSMulOp<0,0,0b00,0b1101,1, IIC_VMACD, "vmla", "f32",
+//                            v2f32, fmul, fadd>;
+//def : N3VSMulOpPat<fmul, fadd, VMLAfd_sfp>;
 
 //let neverHasSideEffects = 1 in
-//def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", v2f32,fmul,fsub>;
-//def : N3VDMulOpsPat<fmul, fsub, VMLSfd_sfp>;
+//def VMLSfd_sfp : N3VSMulOp<0,0,0b10,0b1101,1, IIC_VMACD, "vmls", "f32",
+//                            v2f32, fmul, fsub>;
+//def : N3VSMulOpPat<fmul, fsub, VMLSfd_sfp>;
 
 // Vector Absolute used for single-precision FP
 let neverHasSideEffects = 1 in
-def  VABSfd_sfp : N2VDInts<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
-                           IIC_VUNAD, "vabs", "f32",
-                           v2f32, v2f32, int_arm_neon_vabs>;
-def : N2VDIntsPat<fabs, VABSfd_sfp>;
+def  VABSfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 0,
+                      (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD,
+                      "vabs", "f32", "$dst, $src", "", []>;
+def : N2VSPat<fabs, f32, v2f32, VABSfd_sfp>;
 
 // Vector Negate used for single-precision FP
 let neverHasSideEffects = 1 in
-def  VNEGf32d_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
-                        (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD,
-                        "vneg", "f32", "$dst, $src", "", []>;
-def : N2VDIntsPat<fneg, VNEGf32d_sfp>;
+def  VNEGfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
+                      (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD,
+                      "vneg", "f32", "$dst, $src", "", []>;
+def : N2VSPat<fneg, f32, v2f32, VNEGfd_sfp>;
+
+// Vector Maximum used for single-precision FP
+let neverHasSideEffects = 1 in
+def VMAXfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst),
+                     (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND,
+                     "vmax", "f32", "$dst, $src1, $src2", "", []>;
+def : N3VSPat<NEONfmax, VMAXfd_sfp>;
+
+// Vector Minimum used for single-precision FP
+let neverHasSideEffects = 1 in
+def VMINfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst),
+                     (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND,
+                     "vmin", "f32", "$dst, $src1, $src2", "", []>;
+def : N3VSPat<NEONfmin, VMINfd_sfp>;
 
 // Vector Convert between single-precision FP and integer
 let neverHasSideEffects = 1 in
-def  VCVTf2sd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
-                          v2i32, v2f32, fp_to_sint>;
-def : N2VDsPat<arm_ftosi, f32, v2f32, VCVTf2sd_sfp>;
+def  VCVTf2sd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
+                         v2i32, v2f32, fp_to_sint>;
+def : N2VSPat<arm_ftosi, f32, v2f32, VCVTf2sd_sfp>;
 
 let neverHasSideEffects = 1 in
-def  VCVTf2ud_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
-                          v2i32, v2f32, fp_to_uint>;
-def : N2VDsPat<arm_ftoui, f32, v2f32, VCVTf2ud_sfp>;
+def  VCVTf2ud_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
+                         v2i32, v2f32, fp_to_uint>;
+def : N2VSPat<arm_ftoui, f32, v2f32, VCVTf2ud_sfp>;
 
 let neverHasSideEffects = 1 in
-def  VCVTs2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
-                          v2f32, v2i32, sint_to_fp>;
-def : N2VDsPat<arm_sitof, f32, v2i32, VCVTs2fd_sfp>;
+def  VCVTs2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
+                         v2f32, v2i32, sint_to_fp>;
+def : N2VSPat<arm_sitof, f32, v2i32, VCVTs2fd_sfp>;
 
 let neverHasSideEffects = 1 in
-def  VCVTu2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
-                          v2f32, v2i32, uint_to_fp>;
-def : N2VDsPat<arm_uitof, f32, v2i32, VCVTu2fd_sfp>;
+def  VCVTu2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
+                         v2f32, v2i32, uint_to_fp>;
+def : N2VSPat<arm_uitof, f32, v2i32, VCVTu2fd_sfp>;
 
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 1dcea26..786dd65 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -120,7 +120,10 @@ def t_addrmode_sp : Operand<i32>,
 //  Miscellaneous Instructions.
 //
 
-let Defs = [SP], Uses = [SP] in {
+// FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE
+// from removing one half of the matched pairs. That breaks PEI, which assumes
+// these will always be in pairs, and asserts if it finds otherwise. Better way?
+let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
 def tADJCALLSTACKUP :
 PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), NoItinerary,
            "@ tADJCALLSTACKUP $amt1",
@@ -132,6 +135,76 @@ PseudoInst<(outs), (ins i32imm:$amt), NoItinerary,
            [(ARMcallseq_start imm:$amt)]>, Requires<[IsThumb1Only]>;
 }
 
+def tNOP : T1pI<(outs), (ins), NoItinerary, "nop", "",
+                [/* For disassembly only; pattern left blank */]>,
+           T1Encoding<0b101111> {
+  let Inst{9-8} = 0b11;
+  let Inst{7-0} = 0b00000000;
+} 
+
+def tYIELD : T1pI<(outs), (ins), NoItinerary, "yield", "",
+                  [/* For disassembly only; pattern left blank */]>,
+             T1Encoding<0b101111> {
+  let Inst{9-8} = 0b11;
+  let Inst{7-0} = 0b00010000;
+} 
+
+def tWFE : T1pI<(outs), (ins), NoItinerary, "wfe", "",
+                [/* For disassembly only; pattern left blank */]>,
+           T1Encoding<0b101111> {
+  let Inst{9-8} = 0b11;
+  let Inst{7-0} = 0b00100000;
+} 
+
+def tWFI : T1pI<(outs), (ins), NoItinerary, "wfi", "",
+                [/* For disassembly only; pattern left blank */]>,
+           T1Encoding<0b101111> {
+  let Inst{9-8} = 0b11;
+  let Inst{7-0} = 0b00110000;
+} 
+
+def tSEV : T1pI<(outs), (ins), NoItinerary, "sev", "",
+                [/* For disassembly only; pattern left blank */]>,
+           T1Encoding<0b101111> {
+  let Inst{9-8} = 0b11;
+  let Inst{7-0} = 0b01000000;
+} 
+
+def tSETENDBE : T1I<(outs), (ins), NoItinerary, "setend\tbe",
+                    [/* For disassembly only; pattern left blank */]>,
+                T1Encoding<0b101101> {
+  let Inst{9-5} = 0b10010;
+  let Inst{3} = 1;
+}
+
+def tSETENDLE : T1I<(outs), (ins), NoItinerary, "setend\tle",
+                    [/* For disassembly only; pattern left blank */]>,
+                T1Encoding<0b101101> {
+  let Inst{9-5} = 0b10010;
+  let Inst{3} = 0;
+}
+
+// The i32imm operand $val can be used by a debugger to store more information
+// about the breakpoint.
+def tBKPT : T1I<(outs), (ins i32imm:$val), NoItinerary, "bkpt\t$val",
+                [/* For disassembly only; pattern left blank */]>,
+            T1Encoding<0b101111> {
+  let Inst{9-8} = 0b10;
+}
+
+// Change Processor State is a system instruction -- for disassembly only.
+// The singleton $opt operand contains the following information:
+// opt{4-0} = mode ==> don't care
+// opt{5} = changemode ==> 0 (false for 16-bit Thumb instr)
+// opt{8-6} = AIF from Inst{2-0}
+// opt{10-9} = 1:imod from Inst{4} with 0b10 as enable and 0b11 as disable
+//
+// The opt{4-0} and opt{5} sub-fields are to accommodate 32-bit Thumb and ARM
+// CPS which has more options.
+def tCPS : T1I<(outs), (ins i32imm:$opt), NoItinerary, "cps${opt:cps}",
+              [/* For disassembly only; pattern left blank */]>,
+           T1Misc<0b0110011>;
+
 // For both thumb1 and thumb2.
 let isNotDuplicable = 1 in
 def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr,
@@ -200,7 +273,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
     let Inst{6-3} = 0b1110; // Rm = lr
   }
   // Alternative return instruction used by vararg functions.
-  def tBX_RET_vararg : TI<(outs), (ins tGPR:$target), IIC_Br, "bx\t$target", []>,
+  def tBX_RET_vararg : TI<(outs), (ins tGPR:$target), IIC_Br, "bx\t$target",[]>,
                        T1Special<{1,1,0,?}>; // A6.2.3 & A8.6.25
 }
 
@@ -228,20 +301,20 @@ let isCall = 1,
           D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in {
   // Also used for Thumb2
   def tBL  : TIx2<0b11110, 0b11, 1,
-                  (outs), (ins i32imm:$func, variable_ops), IIC_Br, 
+                  (outs), (ins i32imm:$func, variable_ops), IIC_Br,
                   "bl\t${func:call}",
                   [(ARMtcall tglobaladdr:$func)]>,
              Requires<[IsThumb, IsNotDarwin]>;
 
   // ARMv5T and above, also used for Thumb2
   def tBLXi : TIx2<0b11110, 0b11, 0,
-                   (outs), (ins i32imm:$func, variable_ops), IIC_Br, 
+                   (outs), (ins i32imm:$func, variable_ops), IIC_Br,
                    "blx\t${func:call}",
                    [(ARMcall tglobaladdr:$func)]>,
               Requires<[IsThumb, HasV5T, IsNotDarwin]>;
 
   // Also used for Thumb2
-  def tBLXr : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br, 
+  def tBLXr : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br,
                   "blx\t$func",
                   [(ARMtcall GPR:$func)]>,
               Requires<[IsThumb, HasV5T, IsNotDarwin]>,
@@ -249,7 +322,7 @@ let isCall = 1,
 
   // ARMv4T
   def tBX : TIx2<{?,?,?,?,?}, {?,?}, ?,
-                  (outs), (ins tGPR:$func, variable_ops), IIC_Br, 
+                  (outs), (ins tGPR:$func, variable_ops), IIC_Br,
                   "mov\tlr, pc\n\tbx\t$func",
                   [(ARMcall_nolink tGPR:$func)]>,
             Requires<[IsThumb1Only, IsNotDarwin]>;
@@ -263,20 +336,20 @@ let isCall = 1,
           D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in {
   // Also used for Thumb2
   def tBLr9 : TIx2<0b11110, 0b11, 1,
-                   (outs), (ins i32imm:$func, variable_ops), IIC_Br, 
+                   (outs), (ins i32imm:$func, variable_ops), IIC_Br,
                    "bl\t${func:call}",
                    [(ARMtcall tglobaladdr:$func)]>,
               Requires<[IsThumb, IsDarwin]>;
 
   // ARMv5T and above, also used for Thumb2
   def tBLXi_r9 : TIx2<0b11110, 0b11, 0,
-                      (outs), (ins i32imm:$func, variable_ops), IIC_Br, 
+                      (outs), (ins i32imm:$func, variable_ops), IIC_Br,
                       "blx\t${func:call}",
                       [(ARMcall tglobaladdr:$func)]>,
                  Requires<[IsThumb, HasV5T, IsDarwin]>;
 
   // Also used for Thumb2
-  def tBLXr_r9 : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br, 
+  def tBLXr_r9 : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br,
                     "blx\t$func",
                     [(ARMtcall GPR:$func)]>,
                  Requires<[IsThumb, HasV5T, IsDarwin]>,
@@ -284,7 +357,7 @@ let isCall = 1,
 
   // ARMv4T
   def tBXr9 : TIx2<{?,?,?,?,?}, {?,?}, ?,
-                   (outs), (ins tGPR:$func, variable_ops), IIC_Br, 
+                   (outs), (ins tGPR:$func, variable_ops), IIC_Br,
                    "mov\tlr, pc\n\tbx\t$func",
                    [(ARMcall_nolink tGPR:$func)]>,
               Requires<[IsThumb1Only, IsDarwin]>;
@@ -299,7 +372,7 @@ let isBranch = 1, isTerminator = 1 in {
 
   // Far jump
   let Defs = [LR] in
-  def tBfar : TIx2<0b11110, 0b11, 1, (outs), (ins brtarget:$target), IIC_Br, 
+  def tBfar : TIx2<0b11110, 0b11, 1, (outs), (ins brtarget:$target), IIC_Br,
                     "bl\t$target\t@ far jump",[]>;
 
   def tBR_JTr : T1JTI<(outs),
@@ -332,16 +405,34 @@ let isBranch = 1, isTerminator = 1 in {
               T1Misc<{1,0,?,1,?,?,?}>;
 }
 
+// A8.6.218 Supervisor Call (Software Interrupt) -- for disassembly only
+// A8.6.16 B: Encoding T1
+// If Inst{11-8} == 0b1111 then SEE SVC
+let isCall = 1 in {
+def tSVC : T1pI<(outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc", []>,
+           Encoding16 {
+  let Inst{15-12} = 0b1101;
+  let Inst{11-8} = 0b1111;
+}
+}
+
+// A8.6.16 B: Encoding T1 -- for disassembly only
+// If Inst{11-8} == 0b1110 then UNDEFINED
+def tTRAP : T1I<(outs), (ins), IIC_Br, "trap", []>, Encoding16 {
+  let Inst{15-12} = 0b1101;
+  let Inst{11-8} = 0b1110;
+}
+
 //===----------------------------------------------------------------------===//
 //  Load Store Instructions.
 //
 
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
-def tLDR : T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr, 
+let canFoldAsLoad = 1, isReMaterializable = 1 in
+def tLDR : T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr,
                "ldr", "\t$dst, $addr",
                [(set tGPR:$dst, (load t_addrmode_s4:$addr))]>,
            T1LdSt<0b100>;
-def tLDRi: T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr, 
+def tLDRi: T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr,
                "ldr", "\t$dst, $addr",
                []>,
            T1LdSt4Imm<{1,?,?}>;
@@ -391,15 +482,14 @@ def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi,
 
 // Load tconstpool
 // FIXME: Use ldr.n to work around a Darwin assembler bug.
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1  in 
+let canFoldAsLoad = 1, isReMaterializable = 1 in
 def tLDRpci : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi,
                   "ldr", ".n\t$dst, $addr",
                   [(set tGPR:$dst, (load (ARMWrapper tconstpool:$addr)))]>,
               T1Encoding<{0,1,0,0,1,?}>; // A6.2 & A8.6.59
 
 // Special LDR for loads from non-pc-relative constpools.
-let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
-    mayHaveSideEffects = 1  in
+let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in
 def tLDRcp  : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi,
                   "ldr", "\t$dst, $addr", []>,
               T1LdStSP<{1,?,?}>;
@@ -644,7 +734,7 @@ def tMOVgpr2gpr  : T1I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr,
 // multiply register
 let isCommutable = 1 in
 def tMUL : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMUL32,
-                 "mul", "\t$dst, $rhs",
+                 "mul", "\t$dst, $rhs, $dst", /* A8.6.105 MUL Encoding T1 */
                  [(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>,
            T1DataProcessing<0b1101>;
 
@@ -761,7 +851,7 @@ def tUXTH  : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
              T1Misc<{0,0,1,0,1,0,?}>;
 
 
-// Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC DAG operation.
+// Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC operation.
 // Expanded after instruction selection into a branch sequence.
 let usesCustomInserter = 1 in  // Expanded after instruction selection.
   def tMOVCCr_pseudo :
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 55c7aa2..6241766 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -13,7 +13,7 @@
 
 // IT block predicate field
 def it_pred : Operand<i32> {
-  let PrintMethod = "printPredicateOperand";
+  let PrintMethod = "printMandatoryPredicateOperand";
 }
 
 // IT block condition mask
@@ -53,10 +53,10 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{
 // bits [bits 0-7], the 4-bit shift/splat amount is the next 4 bits [bits 8-11].
 def t2_so_imm : Operand<i32>,
                 PatLeaf<(imm), [{
-  return ARM_AM::getT2SOImmVal((uint32_t)N->getZExtValue()) != -1; 
+  return ARM_AM::getT2SOImmVal((uint32_t)N->getZExtValue()) != -1;
 }]>;
 
-// t2_so_imm_not - Match an immediate that is a complement 
+// t2_so_imm_not - Match an immediate that is a complement
 // of a t2_so_imm.
 def t2_so_imm_not : Operand<i32>,
                     PatLeaf<(imm), [{
@@ -114,13 +114,13 @@ def imm0_4095 : Operand<i32>,
   return (uint32_t)N->getZExtValue() < 4096;
 }]>;
 
-def imm0_4095_neg : PatLeaf<(i32 imm), [{ 
- return (uint32_t)(-N->getZExtValue()) < 4096; 
-}], imm_neg_XFORM>; 
+def imm0_4095_neg : PatLeaf<(i32 imm), [{
+ return (uint32_t)(-N->getZExtValue()) < 4096;
+}], imm_neg_XFORM>;
 
 def imm0_255_neg : PatLeaf<(i32 imm), [{
   return (uint32_t)(-N->getZExtValue()) < 255;
-}], imm_neg_XFORM>; 
+}], imm_neg_XFORM>;
 
 // Define Thumb2 specific addressing modes.
 
@@ -131,7 +131,7 @@ def t2addrmode_imm12 : Operand<i32>,
   let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
 }
 
-// t2addrmode_imm8  := reg - imm8
+// t2addrmode_imm8  := reg +/- imm8
 def t2addrmode_imm8 : Operand<i32>,
                       ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
   let PrintMethod = "printT2AddrModeImm8Operand";
@@ -208,7 +208,7 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode,
 /// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
 //  binary operation that produces a value. These are predicable and can be
 /// changed to modify CPSR.
-multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode, 
+multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
                        bit Commutable = 0, string wide =""> {
    // shifted imm
    def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
@@ -368,15 +368,16 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
 }
 
 /// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns
-/// for a binary operation that produces a value and use and define the carry
+/// for a binary operation that produces a value and use the carry
 /// bit. It's not predicable.
 let Uses = [CPSR] in {
-multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0> {
+multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
+                             bit Commutable = 0> {
    // shifted imm
    def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
                  opc, "\t$dst, $lhs, $rhs",
                  [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
-                 Requires<[IsThumb2, CarryDefIsUnused]> {
+                 Requires<[IsThumb2]> {
      let Inst{31-27} = 0b11110;
      let Inst{25} = 0;
      let Inst{24-21} = opcod;
@@ -387,7 +388,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Comm
    def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
                  opc, ".w\t$dst, $lhs, $rhs",
                  [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>,
-                 Requires<[IsThumb2, CarryDefIsUnused]> {
+                 Requires<[IsThumb2]> {
      let isCommutable = Commutable;
      let Inst{31-27} = 0b11101;
      let Inst{26-25} = 0b01;
@@ -401,19 +402,23 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Comm
    def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
                  opc, ".w\t$dst, $lhs, $rhs",
                  [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>,
-                 Requires<[IsThumb2, CarryDefIsUnused]> {
+                 Requires<[IsThumb2]> {
      let Inst{31-27} = 0b11101;
      let Inst{26-25} = 0b01;
      let Inst{24-21} = opcod;
      let Inst{20} = 0; // The S bit.
    }
-   // Carry setting variants
+}
+
+// Carry setting variants
+let Defs = [CPSR] in {
+multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
+                               bit Commutable = 0> {
    // shifted imm
-   def Sri : T2XI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
-                  !strconcat(opc, "s\t$dst, $lhs, $rhs"),
-                  [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
-                  Requires<[IsThumb2, CarryDefIsUsed]> {
-     let Defs = [CPSR];
+   def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
+                 opc, "\t$dst, $lhs, $rhs",
+                 [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
+                 Requires<[IsThumb2]> {
      let Inst{31-27} = 0b11110;
      let Inst{25} = 0;
      let Inst{24-21} = opcod;
@@ -421,11 +426,10 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Comm
      let Inst{15} = 0;
    }
    // register
-   def Srr : T2XI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
-                  !strconcat(opc, "s.w\t$dst, $lhs, $rhs"),
-                  [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>,
-                  Requires<[IsThumb2, CarryDefIsUsed]> {
-     let Defs = [CPSR];
+   def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+                 opc, ".w\t$dst, $lhs, $rhs",
+                 [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>,
+                 Requires<[IsThumb2]> {
      let isCommutable = Commutable;
      let Inst{31-27} = 0b11101;
      let Inst{26-25} = 0b01;
@@ -436,11 +440,10 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Comm
      let Inst{5-4} = 0b00; // type
    }
    // shifted register
-   def Srs : T2XI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
-                  !strconcat(opc, "s.w\t$dst, $lhs, $rhs"),
-                  [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>,
-                  Requires<[IsThumb2, CarryDefIsUsed]> {
-     let Defs = [CPSR];
+   def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
+                 opc, ".w\t$dst, $lhs, $rhs",
+                 [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>,
+                 Requires<[IsThumb2]> {
      let Inst{31-27} = 0b11101;
      let Inst{26-25} = 0b01;
      let Inst{24-21} = opcod;
@@ -448,6 +451,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Comm
    }
 }
 }
+}
 
 /// T2I_rbin_s_is - Same as T2I_rbin_is except sets 's' bit.
 let Defs = [CPSR] in {
@@ -626,19 +630,6 @@ multiclass T2I_st<bits<2> opcod, string opc, PatFrag opnode> {
   }
 }
 
-/// T2I_picld - Defines the PIC load pattern.
-class T2I_picld<string opc, PatFrag opnode> :
-      T2I<(outs GPR:$dst), (ins addrmodepc:$addr), IIC_iLoadi,
-          !strconcat("\n${addr:label}:\n\t", opc), "\t$dst, $addr",
-          [(set GPR:$dst, (opnode addrmodepc:$addr))]>;
-
-/// T2I_picst - Defines the PIC store pattern.
-class T2I_picst<string opc, PatFrag opnode> :
-      T2I<(outs), (ins GPR:$src, addrmodepc:$addr), IIC_iStorer,
-          !strconcat("\n${addr:label}:\n\t", opc), "\t$src, $addr",
-          [(opnode GPR:$src, addrmodepc:$addr)]>;
-
-
 /// T2I_unary_rrot - A unary operation with two forms: one whose operand is a
 /// register and one whose operand is a register rotated by 8/16/24.
 multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
@@ -666,6 +657,57 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
    }
 }
 
+// SXTB16 and UXTB16 do not need the .w qualifier.
+multiclass T2I_unary_rrot_nw<bits<3> opcod, string opc, PatFrag opnode> {
+  def r     : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
+                  opc, "\t$dst, $src",
+                 [(set GPR:$dst, (opnode GPR:$src))]> {
+     let Inst{31-27} = 0b11111;
+     let Inst{26-23} = 0b0100;
+     let Inst{22-20} = opcod;
+     let Inst{19-16} = 0b1111; // Rn
+     let Inst{15-12} = 0b1111;
+     let Inst{7} = 1;
+     let Inst{5-4} = 0b00; // rotate
+   }
+  def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi,
+                  opc, "\t$dst, $src, ror $rot",
+                 [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]> {
+     let Inst{31-27} = 0b11111;
+     let Inst{26-23} = 0b0100;
+     let Inst{22-20} = opcod;
+     let Inst{19-16} = 0b1111; // Rn
+     let Inst{15-12} = 0b1111;
+     let Inst{7} = 1;
+     let Inst{5-4} = {?,?}; // rotate
+   }
+}
+
+// DO variant - disassembly only, no pattern
+
+multiclass T2I_unary_rrot_DO<bits<3> opcod, string opc> {
+  def r     : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
+                  opc, "\t$dst, $src", []> {
+     let Inst{31-27} = 0b11111;
+     let Inst{26-23} = 0b0100;
+     let Inst{22-20} = opcod;
+     let Inst{19-16} = 0b1111; // Rn
+     let Inst{15-12} = 0b1111;
+     let Inst{7} = 1;
+     let Inst{5-4} = 0b00; // rotate
+   }
+  def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi,
+                  opc, "\t$dst, $src, ror $rot", []> {
+     let Inst{31-27} = 0b11111;
+     let Inst{26-23} = 0b0100;
+     let Inst{22-20} = opcod;
+     let Inst{19-16} = 0b1111; // Rn
+     let Inst{15-12} = 0b1111;
+     let Inst{7} = 1;
+     let Inst{5-4} = {?,?}; // rotate
+   }
+}
+
 /// T2I_bin_rrot - A binary operation with two forms: one whose operand is a
 /// register and one whose operand is a register rotated by 8/16/24.
 multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> {
@@ -692,6 +734,29 @@ multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> {
    }
 }
 
+// DO variant - disassembly only, no pattern
+
+multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> {
+  def rr     : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), IIC_iALUr,
+                  opc, "\t$dst, $LHS, $RHS", []> {
+     let Inst{31-27} = 0b11111;
+     let Inst{26-23} = 0b0100;
+     let Inst{22-20} = opcod;
+     let Inst{15-12} = 0b1111;
+     let Inst{7} = 1;
+     let Inst{5-4} = 0b00; // rotate
+   }
+  def rr_rot : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot),
+                  IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot", []> {
+     let Inst{31-27} = 0b11111;
+     let Inst{26-23} = 0b0100;
+     let Inst{22-20} = opcod;
+     let Inst{15-12} = 0b1111;
+     let Inst{7} = 1;
+     let Inst{5-4} = {?,?}; // rotate
+   }
+}
+
 //===----------------------------------------------------------------------===//
 // Instructions
 //===----------------------------------------------------------------------===//
@@ -734,7 +799,7 @@ def t2ADDrSPi   : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
   let Inst{19-16} = 0b1101; // Rn = sp
   let Inst{15} = 0;
 }
-def t2ADDrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), 
+def t2ADDrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm),
                        IIC_iALUi, "addw", "\t$dst, $sp, $imm", []> {
   let Inst{31-27} = 0b11110;
   let Inst{25} = 1;
@@ -787,6 +852,25 @@ def t2SUBrSPs   : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
   let Inst{15} = 0;
 }
 
+// Signed and unsigned division, for disassembly only
+def t2SDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi, 
+                 "sdiv", "\t$dst, $a, $b", []> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-21} = 0b011100;
+  let Inst{20} = 0b1;
+  let Inst{15-12} = 0b1111;
+  let Inst{7-4} = 0b1111;
+}
+
+def t2UDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi, 
+                 "udiv", "\t$dst, $a, $b", []> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-21} = 0b011101;
+  let Inst{20} = 0b1;
+  let Inst{15-12} = 0b1111;
+  let Inst{7-4} = 0b1111;
+}
+
 // Pseudo instruction that will expand into a t2SUBrSPi + a copy.
 let usesCustomInserter = 1 in { // Expanded after instruction selection.
 def t2SUBrSPi_   : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
@@ -803,7 +887,7 @@ def t2SUBrSPs_   : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
 //
 
 // Load
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1  in 
+let canFoldAsLoad = 1, isReMaterializable = 1  in
 defm t2LDR   : T2I_ld<0, 0b10, "ldr",  UnOpFrag<(load node:$Src)>>;
 
 // Loads with zero extension
@@ -925,10 +1009,32 @@ def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$base_wb),
                             []>;
 }
 
+// LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110) and are
+// for disassembly only.
+// Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4
+class T2IldT<bit signed, bits<2> type, string opc>
+  : T2Ii8<(outs GPR:$dst), (ins t2addrmode_imm8:$addr), IIC_iLoadi, opc,
+          "\t$dst, $addr", []> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-25} = 0b00;
+  let Inst{24} = signed;
+  let Inst{23} = 0;
+  let Inst{22-21} = type;
+  let Inst{20} = 1; // load
+  let Inst{11} = 1;
+  let Inst{10-8} = 0b110; // PUW.
+}
+
+def t2LDRT   : T2IldT<0, 0b10, "ldrt">;
+def t2LDRBT  : T2IldT<0, 0b00, "ldrbt">;
+def t2LDRHT  : T2IldT<0, 0b01, "ldrht">;
+def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt">;
+def t2LDRSHT : T2IldT<1, 0b01, "ldrsht">;
+
 // Store
-defm t2STR   : T2I_st<0b10, "str",  BinOpFrag<(store node:$LHS, node:$RHS)>>;
-defm t2STRB  : T2I_st<0b00, "strb", BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
-defm t2STRH  : T2I_st<0b01, "strh", BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
+defm t2STR :T2I_st<0b10,"str", BinOpFrag<(store node:$LHS, node:$RHS)>>;
+defm t2STRB:T2I_st<0b00,"strb",BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+defm t2STRH:T2I_st<0b01,"strh",BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
 
 // Store doubleword
 let mayLoad = 1, hasExtraSrcRegAllocReq = 1 in
@@ -979,9 +1085,98 @@ def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb),
         [(set GPR:$base_wb,
               (post_truncsti8 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
 
+// STRT, STRBT, STRHT all have offset mode (PUW=0b110) and are for disassembly
+// only.
+// Ref: A8.6.193 STR (immediate, Thumb) Encoding T4
+class T2IstT<bits<2> type, string opc>
+  : T2Ii8<(outs GPR:$src), (ins t2addrmode_imm8:$addr), IIC_iStorei, opc,
+          "\t$src, $addr", []> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-25} = 0b00;
+  let Inst{24} = 0; // not signed
+  let Inst{23} = 0;
+  let Inst{22-21} = type;
+  let Inst{20} = 0; // store
+  let Inst{11} = 1;
+  let Inst{10-8} = 0b110; // PUW
+}
+
+def t2STRT   : T2IstT<0b10, "strt">;
+def t2STRBT  : T2IstT<0b00, "strbt">;
+def t2STRHT  : T2IstT<0b01, "strht">;
 
 // FIXME: ldrd / strd pre / post variants
 
+// T2Ipl (Preload Data/Instruction) signals the memory system of possible future
+// data/instruction access.  These are for disassembly only.
+multiclass T2Ipl<bit instr, bit write, string opc> {
+
+  def i12 : T2I<(outs), (ins t2addrmode_imm12:$addr), IIC_iLoadi, opc,
+                "\t$addr", []> {
+    let Inst{31-25} = 0b1111100;
+    let Inst{24} = instr;
+    let Inst{23} = 1; // U = 1
+    let Inst{22} = 0;
+    let Inst{21} = write;
+    let Inst{20} = 1;
+    let Inst{15-12} = 0b1111;
+  }
+
+  def i8 : T2I<(outs), (ins t2addrmode_imm8:$addr), IIC_iLoadi, opc,
+                "\t$addr", []> {
+    let Inst{31-25} = 0b1111100;
+    let Inst{24} = instr;
+    let Inst{23} = 0; // U = 0
+    let Inst{22} = 0;
+    let Inst{21} = write;
+    let Inst{20} = 1;
+    let Inst{15-12} = 0b1111;
+    let Inst{11-8} = 0b1100;
+  }
+
+  // A8.6.118 #0 and #-0 differs.  Translates -0 to -1, -1 to -2, ..., etc.
+  def pci : T2I<(outs), (ins GPR:$base, i32imm:$imm), IIC_iLoadi, opc,
+                "\t[pc, ${imm:negzero}]", []> {
+    let Inst{31-25} = 0b1111100;
+    let Inst{24} = instr;
+    let Inst{23} = ?; // add = (U == 1)
+    let Inst{22} = 0;
+    let Inst{21} = write;
+    let Inst{20} = 1;
+    let Inst{19-16} = 0b1111; // Rn = 0b1111
+    let Inst{15-12} = 0b1111;
+  }
+
+  def r   : T2I<(outs), (ins GPR:$base, GPR:$a), IIC_iLoadi, opc,
+                "\t[$base, $a]", []> {
+    let Inst{31-25} = 0b1111100;
+    let Inst{24} = instr;
+    let Inst{23} = 0; // add = TRUE for T1
+    let Inst{22} = 0;
+    let Inst{21} = write;
+    let Inst{20} = 1;
+    let Inst{15-12} = 0b1111;
+    let Inst{11-6} = 0000000;
+    let Inst{5-4} = 0b00; // no shift is applied
+  }
+
+  def s   : T2I<(outs), (ins GPR:$base, GPR:$a, i32imm:$shamt), IIC_iLoadi, opc,
+                "\t[$base, $a, lsl $shamt]", []> {
+    let Inst{31-25} = 0b1111100;
+    let Inst{24} = instr;
+    let Inst{23} = 0; // add = TRUE for T1
+    let Inst{22} = 0;
+    let Inst{21} = write;
+    let Inst{20} = 1;
+    let Inst{15-12} = 0b1111;
+    let Inst{11-6} = 0000000;
+  }
+}
+
+defm t2PLD  : T2Ipl<0, 0, "pld">;
+defm t2PLDW : T2Ipl<0, 1, "pldw">;
+defm t2PLI  : T2Ipl<1, 0, "pli">;
+
 //===----------------------------------------------------------------------===//
 //  Load / store multiple Instructions.
 //
@@ -989,7 +1184,7 @@ def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb),
 let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
 def t2LDM : T2XI<(outs),
                  (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
-              IIC_iLoadm, "ldm${addr:submode}${p}${addr:wide}\t$addr, $wb", []> {
+             IIC_iLoadm, "ldm${addr:submode}${p}${addr:wide}\t$addr, $wb", []> {
   let Inst{31-27} = 0b11101;
   let Inst{26-25} = 0b00;
   let Inst{24-23} = {?, ?}; // IA: '01', DB: '10'
@@ -1001,7 +1196,7 @@ def t2LDM : T2XI<(outs),
 let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
 def t2STM : T2XI<(outs),
                  (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
-             IIC_iStorem, "stm${addr:submode}${p}${addr:wide}\t$addr, $wb", []> {
+            IIC_iStorem, "stm${addr:submode}${p}${addr:wide}\t$addr, $wb", []> {
   let Inst{31-27} = 0b11101;
   let Inst{26-25} = 0b00;
   let Inst{24-23} = {?, ?}; // IA: '01', DB: '10'
@@ -1074,13 +1269,15 @@ defm t2SXTB  : T2I_unary_rrot<0b100, "sxtb",
                               UnOpFrag<(sext_inreg node:$Src, i8)>>;
 defm t2SXTH  : T2I_unary_rrot<0b000, "sxth",
                               UnOpFrag<(sext_inreg node:$Src, i16)>>;
+defm t2SXTB16 : T2I_unary_rrot_DO<0b010, "sxtb16">;
 
 defm t2SXTAB : T2I_bin_rrot<0b100, "sxtab",
                         BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
 defm t2SXTAH : T2I_bin_rrot<0b000, "sxtah",
                         BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
+defm t2SXTAB16 : T2I_bin_rrot_DO<0b010, "sxtab16">;
 
-// TODO: SXT(A){B|H}16
+// TODO: SXT(A){B|H}16 - done for disassembly only
 
 // Zero extenders
 
@@ -1089,7 +1286,7 @@ defm t2UXTB   : T2I_unary_rrot<0b101, "uxtb",
                                UnOpFrag<(and node:$Src, 0x000000FF)>>;
 defm t2UXTH   : T2I_unary_rrot<0b001, "uxth",
                                UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
-defm t2UXTB16 : T2I_unary_rrot<0b011, "uxtb16",
+defm t2UXTB16 : T2I_unary_rrot_nw<0b011, "uxtb16",
                                UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
 
 def : T2Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF),
@@ -1101,6 +1298,7 @@ defm t2UXTAB : T2I_bin_rrot<0b101, "uxtab",
                            BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
 defm t2UXTAH : T2I_bin_rrot<0b001, "uxtah",
                            BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
+defm t2UXTAB16 : T2I_bin_rrot_DO<0b011, "uxtab16">;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1119,9 +1317,13 @@ defm t2SUBS : T2I_bin_s_irs <0b1101, "sub",
                              BinOpFrag<(subc node:$LHS, node:$RHS)>>;
 
 defm t2ADC  : T2I_adde_sube_irs<0b1010, "adc",
-                                BinOpFrag<(adde node:$LHS, node:$RHS)>, 1>;
+                          BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>;
 defm t2SBC  : T2I_adde_sube_irs<0b1011, "sbc",
-                                BinOpFrag<(sube node:$LHS, node:$RHS)>>;
+                          BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>;
+defm t2ADCS : T2I_adde_sube_s_irs<0b1010, "adc",
+                          BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>;
+defm t2SBCS : T2I_adde_sube_s_irs<0b1011, "sbc",
+                          BinOpFrag<(sube_live_carry node:$LHS, node:$RHS)>>;
 
 // RSB
 defm t2RSB  : T2I_rbin_is   <0b1110, "rsb",
@@ -1138,6 +1340,155 @@ def : T2Pat<(add       GPR:$src, t2_so_imm_neg:$imm),
 def : T2Pat<(add       GPR:$src, imm0_4095_neg:$imm),
             (t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>;
 
+// Select Bytes -- for disassembly only
+
+def t2SEL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), NoItinerary, "sel",
+                "\t$dst, $a, $b", []> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-24} = 0b010;
+  let Inst{23} = 0b1;
+  let Inst{22-20} = 0b010;
+  let Inst{15-12} = 0b1111;
+  let Inst{7} = 0b1;
+  let Inst{6-4} = 0b000;
+}
+
+// A6.3.13, A6.3.14, A6.3.15 Parallel addition and subtraction (signed/unsigned)
+// And Miscellaneous operations -- for disassembly only
+class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc>
+  : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), NoItinerary, opc,
+        "\t$dst, $a, $b", [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-23} = 0b0101;
+  let Inst{22-20} = op22_20;
+  let Inst{15-12} = 0b1111;
+  let Inst{7-4} = op7_4;
+}
+
+// Saturating add/subtract -- for disassembly only
+
+def t2QADD    : T2I_pam<0b000, 0b1000, "qadd">;
+def t2QADD16  : T2I_pam<0b001, 0b0001, "qadd16">;
+def t2QADD8   : T2I_pam<0b000, 0b0001, "qadd8">;
+def t2QASX    : T2I_pam<0b010, 0b0001, "qasx">;
+def t2QDADD   : T2I_pam<0b000, 0b1001, "qdadd">;
+def t2QDSUB   : T2I_pam<0b000, 0b1011, "qdsub">;
+def t2QSAX    : T2I_pam<0b110, 0b0001, "qsax">;
+def t2QSUB    : T2I_pam<0b000, 0b1010, "qsub">;
+def t2QSUB16  : T2I_pam<0b101, 0b0001, "qsub16">;
+def t2QSUB8   : T2I_pam<0b100, 0b0001, "qsub8">;
+def t2UQADD16 : T2I_pam<0b001, 0b0101, "uqadd16">;
+def t2UQADD8  : T2I_pam<0b000, 0b0101, "uqadd8">;
+def t2UQASX   : T2I_pam<0b010, 0b0101, "uqasx">;
+def t2UQSAX   : T2I_pam<0b110, 0b0101, "uqsax">;
+def t2UQSUB16 : T2I_pam<0b101, 0b0101, "uqsub16">;
+def t2UQSUB8  : T2I_pam<0b100, 0b0101, "uqsub8">;
+
+// Signed/Unsigned add/subtract -- for disassembly only
+
+def t2SASX    : T2I_pam<0b010, 0b0000, "sasx">;
+def t2SADD16  : T2I_pam<0b001, 0b0000, "sadd16">;
+def t2SADD8   : T2I_pam<0b000, 0b0000, "sadd8">;
+def t2SSAX    : T2I_pam<0b110, 0b0000, "ssax">;
+def t2SSUB16  : T2I_pam<0b101, 0b0000, "ssub16">;
+def t2SSUB8   : T2I_pam<0b100, 0b0000, "ssub8">;
+def t2UASX    : T2I_pam<0b010, 0b0100, "uasx">;
+def t2UADD16  : T2I_pam<0b001, 0b0100, "uadd16">;
+def t2UADD8   : T2I_pam<0b000, 0b0100, "uadd8">;
+def t2USAX    : T2I_pam<0b110, 0b0100, "usax">;
+def t2USUB16  : T2I_pam<0b101, 0b0100, "usub16">;
+def t2USUB8   : T2I_pam<0b100, 0b0100, "usub8">;
+
+// Signed/Unsigned halving add/subtract -- for disassembly only
+
+def t2SHASX   : T2I_pam<0b010, 0b0010, "shasx">;
+def t2SHADD16 : T2I_pam<0b001, 0b0010, "shadd16">;
+def t2SHADD8  : T2I_pam<0b000, 0b0010, "shadd8">;
+def t2SHSAX   : T2I_pam<0b110, 0b0010, "shsax">;
+def t2SHSUB16 : T2I_pam<0b101, 0b0010, "shsub16">;
+def t2SHSUB8  : T2I_pam<0b100, 0b0010, "shsub8">;
+def t2UHASX   : T2I_pam<0b010, 0b0110, "uhasx">;
+def t2UHADD16 : T2I_pam<0b001, 0b0110, "uhadd16">;
+def t2UHADD8  : T2I_pam<0b000, 0b0110, "uhadd8">;
+def t2UHSAX   : T2I_pam<0b110, 0b0110, "uhsax">;
+def t2UHSUB16 : T2I_pam<0b101, 0b0110, "uhsub16">;
+def t2UHSUB8  : T2I_pam<0b100, 0b0110, "uhsub8">;
+
+// Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only
+
+def t2USAD8   : T2I_mac<0, 0b111, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+                        NoItinerary, "usad8", "\t$dst, $a, $b", []> {
+  let Inst{15-12} = 0b1111;
+}
+def t2USADA8  : T2I_mac<0, 0b111, 0b0000, (outs GPR:$dst),
+                        (ins GPR:$a, GPR:$b, GPR:$acc), NoItinerary, "usada8",
+                        "\t$dst, $a, $b, $acc", []>;
+
+// Signed/Unsigned saturate -- for disassembly only
+
+def t2SSATlsl : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt),
+                    NoItinerary, "ssat", "\t$dst, $bit_pos, $a, lsl $shamt",
+                    [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25-22} = 0b1100;
+  let Inst{20} = 0;
+  let Inst{15} = 0;
+  let Inst{21} = 0;        // sh = '0'
+}
+
+def t2SSATasr : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt),
+                    NoItinerary, "ssat", "\t$dst, $bit_pos, $a, asr $shamt",
+                    [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25-22} = 0b1100;
+  let Inst{20} = 0;
+  let Inst{15} = 0;
+  let Inst{21} = 1;        // sh = '1'
+}
+
+def t2SSAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary,
+                   "ssat16", "\t$dst, $bit_pos, $a",
+                   [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25-22} = 0b1100;
+  let Inst{20} = 0;
+  let Inst{15} = 0;
+  let Inst{21} = 1;        // sh = '1'
+  let Inst{14-12} = 0b000; // imm3 = '000'
+  let Inst{7-6} = 0b00;    // imm2 = '00'
+}
+
+def t2USATlsl : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt),
+                     NoItinerary, "usat", "\t$dst, $bit_pos, $a, lsl $shamt",
+                     [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25-22} = 0b1110;
+  let Inst{20} = 0;
+  let Inst{15} = 0;
+  let Inst{21} = 0;        // sh = '0'
+}
+
+def t2USATasr : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt),
+                     NoItinerary, "usat", "\t$dst, $bit_pos, $a, asr $shamt",
+                     [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25-22} = 0b1110;
+  let Inst{20} = 0;
+  let Inst{15} = 0;
+  let Inst{21} = 1;        // sh = '1'
+}
+
+def t2USAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary,
+                   "usat16", "\t$dst, $bit_pos, $a",
+                   [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25-22} = 0b1110;
+  let Inst{20} = 0;
+  let Inst{15} = 0;
+  let Inst{21} = 1;        // sh = '1'
+  let Inst{14-12} = 0b000; // imm3 = '000'
+  let Inst{7-6} = 0b00;    // imm2 = '00'
+}
 
 //===----------------------------------------------------------------------===//
 //  Shift and rotate Instructions.
@@ -1342,6 +1693,8 @@ def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64,
 }
 } // neverHasSideEffects
 
+// Rounding variants of the below included for disassembly only
+
 // Most significant word multiply
 def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
                   "smmul", "\t$dst, $a, $b",
@@ -1353,6 +1706,15 @@ def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
   let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
 }
 
+def t2SMMULR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+                  "smmulr", "\t$dst, $a, $b", []> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-23} = 0b0110;
+  let Inst{22-20} = 0b101;
+  let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+  let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
+}
+
 def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
                   "smmla", "\t$dst, $a, $b, $c",
                   [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]> {
@@ -1363,6 +1725,14 @@ def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
   let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
 }
 
+def t2SMMLAR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
+                  "smmlar", "\t$dst, $a, $b, $c", []> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-23} = 0b0110;
+  let Inst{22-20} = 0b101;
+  let Inst{15-12} = {?, ?, ?, ?}; // Ra
+  let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
+}
 
 def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
                    "smmls", "\t$dst, $a, $b, $c",
@@ -1374,6 +1744,15 @@ def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
   let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
 }
 
+def t2SMMLSR : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
+                   "smmlsr", "\t$dst, $a, $b, $c", []> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-23} = 0b0110;
+  let Inst{22-20} = 0b110;
+  let Inst{15-12} = {?, ?, ?, ?}; // Ra
+  let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
+}
+
 multiclass T2I_smul<string opc, PatFrag opnode> {
   def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
               !strconcat(opc, "bb"), "\t$dst, $a, $b",
@@ -1466,7 +1845,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
   def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
              !strconcat(opc, "bt"), "\t$dst, $a, $b, $acc",
              [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16),
-                                                    (sra GPR:$b, (i32 16)))))]> {
+                                                   (sra GPR:$b, (i32 16)))))]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b001;
@@ -1490,7 +1869,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
   def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
               !strconcat(opc, "tt"), "\t$dst, $a, $b, $acc",
              [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
-                                                    (sra GPR:$b, (i32 16)))))]> {
+                                                   (sra GPR:$b, (i32 16)))))]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b001;
@@ -1502,7 +1881,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
   def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
               !strconcat(opc, "wb"), "\t$dst, $a, $b, $acc",
               [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
-                                       (sext_inreg GPR:$b, i16)), (i32 16))))]> {
+                                      (sext_inreg GPR:$b, i16)), (i32 16))))]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b011;
@@ -1514,7 +1893,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
   def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
               !strconcat(opc, "wt"), "\t$dst, $a, $b, $acc",
               [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
-                                         (sra GPR:$b, (i32 16))), (i32 16))))]> {
+                                        (sra GPR:$b, (i32 16))), (i32 16))))]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b011;
@@ -1527,16 +1906,70 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
 defm t2SMUL : T2I_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
 defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
 
-// TODO: Halfword multiple accumulate long: SMLAL<x><y>
-// TODO: Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
-
+// Halfword multiple accumulate long: SMLAL<x><y> -- for disassembly only
+def t2SMLALBB : T2I_mac<1, 0b100, 0b1000, (outs GPR:$ldst,GPR:$hdst),
+           (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlalbb", "\t$ldst, $hdst, $a, $b",
+           [/* For disassembly only; pattern left blank */]>;
+def t2SMLALBT : T2I_mac<1, 0b100, 0b1001, (outs GPR:$ldst,GPR:$hdst),
+           (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlalbt", "\t$ldst, $hdst, $a, $b",
+           [/* For disassembly only; pattern left blank */]>;
+def t2SMLALTB : T2I_mac<1, 0b100, 0b1010, (outs GPR:$ldst,GPR:$hdst),
+           (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaltb", "\t$ldst, $hdst, $a, $b",
+           [/* For disassembly only; pattern left blank */]>;
+def t2SMLALTT : T2I_mac<1, 0b100, 0b1011, (outs GPR:$ldst,GPR:$hdst),
+           (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaltt", "\t$ldst, $hdst, $a, $b",
+           [/* For disassembly only; pattern left blank */]>;
+
+// Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
+// These are for disassembly only.
+
+def t2SMUAD   : T2I_mac<0, 0b010, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+                        IIC_iMAC32, "smuad", "\t$dst, $a, $b", []> {
+  let Inst{15-12} = 0b1111;
+}
+def t2SMUADX  : T2I_mac<0, 0b010, 0b0001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+                        IIC_iMAC32, "smuadx", "\t$dst, $a, $b", []> {
+  let Inst{15-12} = 0b1111;
+}
+def t2SMUSD   : T2I_mac<0, 0b100, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+                        IIC_iMAC32, "smusd", "\t$dst, $a, $b", []> {
+  let Inst{15-12} = 0b1111;
+}
+def t2SMUSDX  : T2I_mac<0, 0b100, 0b0001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+                        IIC_iMAC32, "smusdx", "\t$dst, $a, $b", []> {
+  let Inst{15-12} = 0b1111;
+}
+def t2SMLAD   : T2I_mac<0, 0b010, 0b0000, (outs GPR:$dst),
+                        (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlad",
+                        "\t$dst, $a, $b, $acc", []>;
+def t2SMLADX  : T2I_mac<0, 0b010, 0b0001, (outs GPR:$dst),
+                        (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smladx",
+                        "\t$dst, $a, $b, $acc", []>;
+def t2SMLSD   : T2I_mac<0, 0b100, 0b0000, (outs GPR:$dst),
+                        (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlsd",
+                        "\t$dst, $a, $b, $acc", []>;
+def t2SMLSDX  : T2I_mac<0, 0b100, 0b0001, (outs GPR:$dst),
+                        (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlsdx",
+                        "\t$dst, $a, $b, $acc", []>;
+def t2SMLALD  : T2I_mac<1, 0b100, 0b1100, (outs GPR:$ldst,GPR:$hdst),
+                        (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlald",
+                        "\t$ldst, $hdst, $a, $b", []>;
+def t2SMLALDX : T2I_mac<1, 0b100, 0b1101, (outs GPR:$ldst,GPR:$hdst),
+                        (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaldx",
+                        "\t$ldst, $hdst, $a, $b", []>;
+def t2SMLSLD  : T2I_mac<1, 0b101, 0b1100, (outs GPR:$ldst,GPR:$hdst),
+                        (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlsld",
+                        "\t$ldst, $hdst, $a, $b", []>;
+def t2SMLSLDX : T2I_mac<1, 0b101, 0b1101, (outs GPR:$ldst,GPR:$hdst),
+                        (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlsldx",
+                        "\t$ldst, $hdst, $a, $b", []>;
 
 //===----------------------------------------------------------------------===//
 //  Misc. Arithmetic Instructions.
 //
 
-class T2I_misc<bits<2> op1, bits<2> op2, dag oops, dag iops, InstrItinClass itin,
-              string opc, string asm, list<dag> pattern>
+class T2I_misc<bits<2> op1, bits<2> op2, dag oops, dag iops,
+      InstrItinClass itin, string opc, string asm, list<dag> pattern>
   : T2I<oops, iops, itin, opc, asm, pattern> {
   let Inst{31-27} = 0b11111;
   let Inst{26-22} = 0b01010;
@@ -1572,7 +2005,7 @@ def t2REVSH : T2I_misc<0b01, 0b11, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
                           (shl GPR:$src, (i32 8))), i16))]>;
 
 def t2PKHBT : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
-                  IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, LSL $shamt",
+                  IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, lsl $shamt",
                   [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF),
                                       (and (shl GPR:$src2, (i32 imm:$shamt)),
                                            0xFFFF0000)))]> {
@@ -1590,7 +2023,7 @@ def : T2Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)),
             (t2PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>;
 
 def t2PKHTB : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
-                  IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, ASR $shamt",
+                  IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, asr $shamt",
                   [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000),
                                       (and (sra GPR:$src2, imm16_31:$shamt),
                                            0xFFFF)))]> {
@@ -1643,7 +2076,7 @@ defm t2TEQ  : T2I_cmp_irs<0b0100, "teq",
 
 // Conditional moves
 // FIXME: should be able to write a pattern for ARMcmov, but can't use
-// a two-value operand where a dag node expects two operands. :( 
+// a two-value operand where a dag node expects two operands. :(
 def t2MOVCCr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true), IIC_iCMOVr,
                    "mov", ".w\t$dst, $true",
       [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>,
@@ -1723,6 +2156,66 @@ def t2Int_SyncBarrierV7 : AInoP<(outs), (ins),
 }
 }
 
+// Helper class for multiclass T2MemB -- for disassembly only
+class T2I_memb<string opc, string asm>
+  : T2I<(outs), (ins), NoItinerary, opc, asm,
+        [/* For disassembly only; pattern left blank */]>,
+    Requires<[IsThumb2, HasV7]> {
+  let Inst{31-20} = 0xf3b;
+  let Inst{15-14} = 0b10;
+  let Inst{12} = 0;
+}
+
+multiclass T2MemB<bits<4> op7_4, string opc> {
+
+  def st : T2I_memb<opc, "\tst"> {
+    let Inst{7-4} = op7_4;
+    let Inst{3-0} = 0b1110;
+  }
+
+  def ish : T2I_memb<opc, "\tish"> {
+    let Inst{7-4} = op7_4;
+    let Inst{3-0} = 0b1011;
+  }
+
+  def ishst : T2I_memb<opc, "\tishst"> {
+    let Inst{7-4} = op7_4;
+    let Inst{3-0} = 0b1010;
+  }
+
+  def nsh : T2I_memb<opc, "\tnsh"> {
+    let Inst{7-4} = op7_4;
+    let Inst{3-0} = 0b0111;
+  }
+
+  def nshst : T2I_memb<opc, "\tnshst"> {
+    let Inst{7-4} = op7_4;
+    let Inst{3-0} = 0b0110;
+  }
+
+  def osh : T2I_memb<opc, "\tosh"> {
+    let Inst{7-4} = op7_4;
+    let Inst{3-0} = 0b0011;
+  }
+
+  def oshst : T2I_memb<opc, "\toshst"> {
+    let Inst{7-4} = op7_4;
+    let Inst{3-0} = 0b0010;
+  }
+}
+
+// These DMB variants are for disassembly only.
+defm t2DMB : T2MemB<0b0101, "dmb">;
+
+// These DSB variants are for disassembly only.
+defm t2DSB : T2MemB<0b0100, "dsb">;
+
+// ISB has only full system option -- for disassembly only
+def t2ISBsy : T2I_memb<"isb", ""> {
+  let Inst{7-4} = 0b0110;
+  let Inst{3-0} = 0b1111;
+}
+
 class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz,
                 InstrItinClass itin, string opc, string asm, string cstr,
                 list<dag> pattern, bits<4> rt2 = 0b1111>
@@ -1789,6 +2282,16 @@ def t2STREXD : T2I_strex<0b11, (outs GPR:$success),
                          {?, ?, ?, ?}>;
 }
 
+// Clear-Exclusive is for disassembly only.
+def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "",
+                  [/* For disassembly only; pattern left blank */]>,
+            Requires<[IsARM, HasV7]>  {
+  let Inst{31-20} = 0xf3b;
+  let Inst{15-14} = 0b10;
+  let Inst{12} = 0;
+  let Inst{7-4} = 0b0010;
+}
+
 //===----------------------------------------------------------------------===//
 // TLS Instructions
 //
@@ -1906,6 +2409,24 @@ def t2TBH :
   let Inst{15-8} = 0b11110000;
   let Inst{7-4} = 0b0001; // H form
 }
+
+// Generic versions of the above two instructions, for disassembly only
+
+def t2TBBgen : T2I<(outs), (ins GPR:$a, GPR:$b), IIC_Br,
+                    "tbb", "\t[$a, $b]", []>{
+  let Inst{31-27} = 0b11101;
+  let Inst{26-20} = 0b0001101;
+  let Inst{15-8} = 0b11110000;
+  let Inst{7-4} = 0b0000; // B form
+}
+
+def t2TBHgen : T2I<(outs), (ins GPR:$a, GPR:$b), IIC_Br,
+                   "tbh", "\t[$a, $b, lsl #1]", []> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-20} = 0b0001101;
+  let Inst{15-8} = 0b11110000;
+  let Inst{7-4} = 0b0001; // H form
+}
 } // isNotDuplicable, isIndirectBranch
 
 } // isBranch, isTerminator, isBarrier
@@ -1931,6 +2452,119 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
   let Inst{15-8} = 0b10111111;
 }
 
+// Branch and Exchange Jazelle -- for disassembly only
+// Rm = Inst{19-16}
+def t2BXJ : T2I<(outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
+              [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{26} = 0;
+  let Inst{25-20} = 0b111100;
+  let Inst{15-14} = 0b10;
+  let Inst{12} = 0;
+}
+
+// Change Processor State is a system instruction -- for disassembly only.
+// The singleton $opt operand contains the following information:
+// opt{4-0} = mode from Inst{4-0}
+// opt{5} = changemode from Inst{17}
+// opt{8-6} = AIF from Inst{8-6}
+// opt{10-9} = imod from Inst{19-18} with 0b10 as enable and 0b11 as disable
+def t2CPS : T2XI<(outs),(ins i32imm:$opt), NoItinerary, "cps${opt:cps}",
+                 [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{26} = 0;
+  let Inst{25-20} = 0b111010;
+  let Inst{15-14} = 0b10;
+  let Inst{12} = 0;
+}
+
+// A6.3.4 Branches and miscellaneous control
+// Table A6-14 Change Processor State, and hint instructions
+// Helper class for disassembly only.
+class T2I_hint<bits<8> op7_0, string opc, string asm>
+  : T2I<(outs), (ins), NoItinerary, opc, asm,
+        [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-20} = 0xf3a;
+  let Inst{15-14} = 0b10;
+  let Inst{12} = 0;
+  let Inst{10-8} = 0b000;
+  let Inst{7-0} = op7_0;
+}
+
+def t2NOP   : T2I_hint<0b00000000, "nop",   ".w">;
+def t2YIELD : T2I_hint<0b00000001, "yield", ".w">;
+def t2WFE   : T2I_hint<0b00000010, "wfe",   ".w">;
+def t2WFI   : T2I_hint<0b00000011, "wfi",   ".w">;
+def t2SEV   : T2I_hint<0b00000100, "sev",   ".w">;
+
+def t2DBG : T2I<(outs),(ins i32imm:$opt), NoItinerary, "dbg", "\t$opt",
+                [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-20} = 0xf3a;
+  let Inst{15-14} = 0b10;
+  let Inst{12} = 0;
+  let Inst{10-8} = 0b000;
+  let Inst{7-4} = 0b1111;
+}
+
+// Secure Monitor Call is a system instruction -- for disassembly only
+// Option = Inst{19-16}
+def t2SMC : T2I<(outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt",
+                [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{26-20} = 0b1111111;
+  let Inst{15-12} = 0b1000;
+}
+
+// Store Return State is a system instruction -- for disassembly only
+def t2SRSDBW : T2I<(outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp!, $mode",
+                   [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-20} = 0b0000010; // W = 1
+}
+
+def t2SRSDB  : T2I<(outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp, $mode",
+                   [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-20} = 0b0000000; // W = 0
+}
+
+def t2SRSIAW : T2I<(outs),(ins i32imm:$mode),NoItinerary,"srsia","\tsp!, $mode",
+                   [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-20} = 0b0011010; // W = 1
+}
+
+def t2SRSIA  : T2I<(outs), (ins i32imm:$mode),NoItinerary,"srsia","\tsp, $mode",
+                   [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-20} = 0b0011000; // W = 0
+}
+
+// Return From Exception is a system instruction -- for disassembly only
+def t2RFEDBW : T2I<(outs), (ins GPR:$base), NoItinerary, "rfedb", "\t$base!",
+                   [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-20} = 0b0000011; // W = 1
+}
+
+def t2RFEDB  : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeab", "\t$base",
+                   [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-20} = 0b0000001; // W = 0
+}
+
+def t2RFEIAW : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeia", "\t$base!",
+                   [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-20} = 0b0011011; // W = 1
+}
+
+def t2RFEIA  : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeia", "\t$base",
+                   [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-20} = 0b0011001; // W = 0
+}
+
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
 //
@@ -1970,9 +2604,59 @@ def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
 // Pseudo instruction that combines ldr from constpool and add pc. This should
 // be expanded into two instructions late to allow if-conversion and
 // scheduling.
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in 
+let canFoldAsLoad = 1, isReMaterializable = 1 in
 def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
                    NoItinerary, "@ ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
                [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
                                            imm:$cp))]>,
                Requires<[IsThumb2]>;
+
+//===----------------------------------------------------------------------===//
+// Move between special register and ARM core register -- for disassembly only
+//
+
+// Rd = Instr{11-8}
+def t2MRS : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, cpsr",
+                [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{26} = 0;
+  let Inst{25-21} = 0b11111;
+  let Inst{20} = 0; // The R bit.
+  let Inst{15-14} = 0b10;
+  let Inst{12} = 0;
+}
+
+// Rd = Instr{11-8}
+def t2MRSsys : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, spsr",
+                   [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{26} = 0;
+  let Inst{25-21} = 0b11111;
+  let Inst{20} = 1; // The R bit.
+  let Inst{15-14} = 0b10;
+  let Inst{12} = 0;
+}
+
+// FIXME: mask is ignored for the time being.
+// Rn = Inst{19-16}
+def t2MSR : T2I<(outs), (ins GPR:$src), NoItinerary, "msr", "\tcpsr, $src",
+                [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{26} = 0;
+  let Inst{25-21} = 0b11100;
+  let Inst{20} = 0; // The R bit.
+  let Inst{15-14} = 0b10;
+  let Inst{12} = 0;
+}
+
+// FIXME: mask is ignored for the time being.
+// Rn = Inst{19-16}
+def t2MSRsys : T2I<(outs), (ins GPR:$src), NoItinerary, "msr", "\tspsr, $src",
+                   [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{26} = 0;
+  let Inst{25-21} = 0b11100;
+  let Inst{20} = 1; // The R bit.
+  let Inst{15-14} = 0b10;
+  let Inst{12} = 0;
+}
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 365e1e3..7c117ed 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -54,7 +54,7 @@ def vfp_f64imm : Operand<f64>,
 //  Load / store Instructions.
 //
 
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in {
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
 def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr),
                  IIC_fpLoad64, "vldr", ".64\t$dst, $addr",
                  [(set DPR:$dst, (load addrmode5:$addr))]>;
@@ -412,6 +412,101 @@ def VTOUIRS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010,
   let Inst{7} = 0; // Z bit
 }
 
+// Convert between floating-point and fixed-point
+// Data type for fixed-point naming convention:
+//   S16 (U=0, sx=0) -> SH
+//   U16 (U=1, sx=0) -> UH
+//   S32 (U=0, sx=1) -> SL
+//   U32 (U=1, sx=1) -> UL
+
+let Constraints = "$a = $dst" in {
+
+// FP to Fixed-Point:
+
+def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0,
+                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+                 IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0,
+                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+                 IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1,
+                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+                 IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1,
+                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+                 IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0,
+                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+                 IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VTOUHD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 0,
+                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+                 IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VTOSLD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 1,
+                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+                 IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VTOULD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 1,
+                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+                 IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+// Fixed-Point to FP:
+
+def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0,
+                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+                 IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0,
+                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+                 IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1,
+                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+                 IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1,
+                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+                 IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0,
+                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+                 IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VUHTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 0,
+                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+                 IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VSLTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 1,
+                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+                 IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1,
+                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+                 IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+} // End of 'let Constraints = "$src = $dst" in'
+
 //===----------------------------------------------------------------------===//
 // FP FMA Operations.
 //
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index bef5a06..6db6ba4 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -48,7 +48,13 @@ static TargetJITInfo::JITCompilerFn JITCompilerFunction;
 // write our own wrapper, which does things our way, so we have complete
 // control over register saving and restoring.
 extern "C" {
-#if defined(__arm__)
+    // We don't need this on Android (generally on hand-held devices). This
+    // function is for the purpose of supporting "lazy symbol lookup" (lookup
+    // undefined symbol at runtime) (Actually, if you tried to remove the 
+    // !defined(ANDROID) guard, you'll get compilation error since Android's
+    // toolchain choose armv5te as its CPU architecture which does not support
+    // instruction 'stmdb' and 'ldmia' within the function)
+#if defined(__arm__) && !defined(ANDROID)
   void ARMCompilationCallback();
   asm(
     ".text\n"
@@ -60,7 +66,7 @@ extern "C" {
     // whole compilation callback doesn't exist as far as the caller is
     // concerned, so we can't just preserve the callee saved regs.
     "stmdb sp!, {r0, r1, r2, r3, lr}\n"
-#ifndef __SOFTFP__
+#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
     "fstmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
 #endif
     // The LR contains the address of the stub function on entry.
@@ -83,7 +89,7 @@ extern "C" {
     // 6-20 | D0..D7 | Saved VFP registers
     //      +--------+
     //
-#ifndef __SOFTFP__
+#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
     // Restore VFP caller-saved registers.
     "fldmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
 #endif
@@ -318,6 +324,18 @@ void ARMJITInfo::relocate(void *Function, MachineRelocation *MR,
       *((intptr_t*)RelocPos) |= ResultPtr;
       break;
     }
+    case ARM::reloc_arm_movw: {
+      ResultPtr = ResultPtr & 0xFFFF; 
+      *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF;
+      *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16; // imm4:imm12, Insts[19-16] = imm4, Insts[11-0] = imm12
+      break;
+    }
+    case ARM::reloc_arm_movt: {
+      ResultPtr = (ResultPtr >> 16) & 0xFFFF; 
+      *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF;
+      *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16; // imm4:imm12, Insts[19-16] = imm4, Insts[11-0] = imm12
+      break;
+    }
     }
   }
 }
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index b78b95b..19f1e3b 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -350,7 +350,8 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
       : ARMRegisterInfo::getRegisterNumbering(Reg);
     // AM4 - register numbers in ascending order.
     // AM5 - consecutive register numbers in ascending order.
-    if (NewOffset == Offset + (int)Size &&
+    if (Reg != ARM::SP &&
+        NewOffset == Offset + (int)Size &&
         ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) {
       Offset += Size;
       PRegNum = RegNum;
@@ -747,11 +748,24 @@ static bool isMemoryOp(const MachineInstr *MI) {
     if (MMO->isVolatile())
       return false;
 
-    // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is not.
+    // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
+    // not.
     if (MMO->getAlignment() < 4)
       return false;
   }
 
+  // str <undef> could probably be eliminated entirely, but for now we just want
+  // to avoid making a mess of it.
+  // FIXME: Use str <undef> as a wildcard to enable better stm folding.
+  if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
+      MI->getOperand(0).isUndef())
+    return false;
+
+  // Likewise don't mess with references to undefined addresses.
+  if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
+      MI->getOperand(1).isUndef())
+    return false;
+
   int Opcode = MI->getOpcode();
   switch (Opcode) {
   default: break;
diff --git a/lib/Target/ARM/ARMRelocations.h b/lib/Target/ARM/ARMRelocations.h
index 2cc2950..86e7206 100644
--- a/lib/Target/ARM/ARMRelocations.h
+++ b/lib/Target/ARM/ARMRelocations.h
@@ -47,7 +47,13 @@ namespace llvm {
       reloc_arm_pic_jt,
 
       // reloc_arm_branch - Branch address relocation.
-      reloc_arm_branch
+      reloc_arm_branch,
+
+      // reloc_arm_movt  - MOVT immediate relocation.
+      reloc_arm_movt,
+
+      // reloc_arm_movw  - MOVW immediate relocation.
+      reloc_arm_movw
     };
   }
 }
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 426862c..622034b 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -33,7 +33,7 @@ UseMOVT("arm-use-movt",
 
 ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
                            bool isT)
-  : ARMArchVersion(V4T)
+  : ARMArchVersion(V4)
   , ARMFPUType(None)
   , UseNEONForSinglePrecisionFP(UseNEONFP)
   , IsThumb(isT)
@@ -54,6 +54,11 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
   // Parse features string.
   CPUString = ParseSubtargetFeatures(FS, CPUString);
 
+  // When no arch is specified either by CPU or by attributes, make the default
+  // ARMv4T.
+  if (CPUString == "generic" && (FS.empty() || FS == "generic"))
+    ARMArchVersion = V4T;
+
   // Set the boolean corresponding to the current target triple, or the default
   // if one cannot be determined, to true.
   unsigned Len = TT.length();
@@ -68,25 +73,28 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
   }
   if (Idx) {
     unsigned SubVer = TT[Idx];
-    if (SubVer > '4' && SubVer <= '9') {
-      if (SubVer >= '7') {
-        ARMArchVersion = V7A;
-      } else if (SubVer == '6') {
-        ARMArchVersion = V6;
-        if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2')
-          ARMArchVersion = V6T2;
-      } else if (SubVer == '5') {
-        ARMArchVersion = V5T;
-        if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e')
-          ARMArchVersion = V5TE;
-      }
-      if (ARMArchVersion >= V6T2)
-        ThumbMode = Thumb2;
+    if (SubVer >= '7' && SubVer <= '9') {
+      ARMArchVersion = V7A;
+    } else if (SubVer == '6') {
+      ARMArchVersion = V6;
+      if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2')
+        ARMArchVersion = V6T2;
+    } else if (SubVer == '5') {
+      ARMArchVersion = V5T;
+      if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e')
+        ARMArchVersion = V5TE;
+    } else if (SubVer == '4') {
+      if (Len >= Idx+2 && TT[Idx+1] == 't')
+        ARMArchVersion = V4T;
+      else
+        ARMArchVersion = V4;
     }
   }
 
   // Thumb2 implies at least V6T2.
-  if (ARMArchVersion < V6T2 && ThumbMode >= Thumb2)
+  if (ARMArchVersion >= V6T2)
+    ThumbMode = Thumb2;
+  else if (ThumbMode >= Thumb2)
     ARMArchVersion = V6T2;
 
   if (Len >= 10) {
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 3f06b7b..6980851 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -26,7 +26,7 @@ class GlobalValue;
 class ARMSubtarget : public TargetSubtarget {
 protected:
   enum ARMArchEnum {
-    V4T, V5T, V5TE, V6, V6T2, V7A
+    V4, V4T, V5T, V5TE, V6, V6T2, V7A
   };
 
   enum ARMFPEnum {
@@ -38,7 +38,7 @@ protected:
     Thumb2
   };
 
-  /// ARMArchVersion - ARM architecture version: V4T (base), V5T, V5TE,
+  /// ARMArchVersion - ARM architecture version: V4, V4T (base), V5T, V5TE,
   /// V6, V6T2, V7A.
   ARMArchEnum ARMArchVersion;
 
diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h
index 9703403..a488c0a 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.h
+++ b/lib/Target/ARM/ARMTargetObjectFile.h
@@ -10,7 +10,7 @@
 #ifndef LLVM_TARGET_ARM_TARGETOBJECTFILE_H
 #define LLVM_TARGET_ARM_TARGETOBJECTFILE_H
 
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/MC/MCSectionELF.h"
 
 namespace llvm {
@@ -24,7 +24,7 @@ namespace llvm {
 
       if (TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI()) {
         StaticCtorSection =
-          getELFSection(".init_array", MCSectionELF::SHT_INIT_ARRAY, 
+          getELFSection(".init_array", MCSectionELF::SHT_INIT_ARRAY,
                         MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
                         SectionKind::getDataRel());
         StaticDtorSection =
diff --git a/lib/Target/ARM/Android.mk b/lib/Target/ARM/Android.mk
new file mode 100644
index 0000000..ea796af
--- /dev/null
+++ b/lib/Target/ARM/Android.mk
@@ -0,0 +1,49 @@
+LOCAL_PATH := $(call my-dir)
+
+# For the device only
+# =====================================================
+include $(CLEAR_VARS)
+include $(CLEAR_TBLGEN_VARS)
+
+TBLGEN_TABLES :=	\
+	ARMGenRegisterInfo.h.inc	\
+	ARMGenRegisterNames.inc	\
+    ARMGenRegisterInfo.inc	\
+	ARMGenInstrNames.inc	\
+    ARMGenInstrInfo.inc	\
+    ARMGenDAGISel.inc	\
+	ARMGenSubtarget.inc	\
+    ARMGenCodeEmitter.inc	\
+	ARMGenCallingConv.inc
+
+LOCAL_SRC_FILES :=	\
+	ARMBaseInstrInfo.cpp	\
+	ARMBaseRegisterInfo.cpp	\
+	ARMCodeEmitter.cpp	\
+	ARMConstantIslandPass.cpp	\
+	ARMConstantPoolValue.cpp	\
+	ARMExpandPseudoInsts.cpp	\
+	ARMISelDAGToDAG.cpp	\
+	ARMISelLowering.cpp	\
+	ARMInstrInfo.cpp	\
+	ARMJITInfo.cpp	\
+	ARMLoadStoreOptimizer.cpp	\
+	ARMMCAsmInfo.cpp	\
+	ARMRegisterInfo.cpp	\
+	ARMSubtarget.cpp	\
+	ARMTargetMachine.cpp	\
+	NEONMoveFix.cpp	\
+	NEONPreAllocPass.cpp	\
+	Thumb1InstrInfo.cpp	\
+	Thumb1RegisterInfo.cpp	\
+	Thumb2ITBlockPass.cpp	\
+	Thumb2InstrInfo.cpp	\
+	Thumb2RegisterInfo.cpp	\
+	Thumb2SizeReduction.cpp
+
+LOCAL_MODULE:= libLLVMARMCodeGen
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(LLVM_TBLGEN_RULES_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index 0a75c09..d6d595c 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -30,6 +30,7 @@
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInst.h"
@@ -37,7 +38,6 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegistry.h"
@@ -122,6 +122,7 @@ namespace {
     void printT2AddrModeSoRegOperand(const MachineInstr *MI, int OpNum);
 
     void printPredicateOperand(const MachineInstr *MI, int OpNum);
+    void printMandatoryPredicateOperand(const MachineInstr *MI, int OpNum);
     void printSBitModifierOperand(const MachineInstr *MI, int OpNum);
     void printPCLabel(const MachineInstr *MI, int OpNum);
     void printRegisterList(const MachineInstr *MI, int OpNum);
@@ -786,6 +787,12 @@ void ARMAsmPrinter::printPredicateOperand(const MachineInstr *MI, int OpNum) {
     O << ARMCondCodeToString(CC);
 }
 
+void ARMAsmPrinter::printMandatoryPredicateOperand(const MachineInstr *MI,
+                                                   int OpNum) {
+  ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
+  O << ARMCondCodeToString(CC);
+}
+
 void ARMAsmPrinter::printSBitModifierOperand(const MachineInstr *MI, int OpNum){
   unsigned Reg = MI->getOperand(OpNum).getReg();
   if (Reg) {
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
index d7d8e09..a2084b0 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
@@ -325,6 +325,12 @@ void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum) {
     O << ARMCondCodeToString(CC);
 }
 
+void ARMInstPrinter::printMandatoryPredicateOperand(const MCInst *MI, 
+                                                    unsigned OpNum) {
+  ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
+  O << ARMCondCodeToString(CC);
+}
+
 void ARMInstPrinter::printSBitModifierOperand(const MCInst *MI, unsigned OpNum){
   if (MI->getOperand(OpNum).getReg()) {
     assert(MI->getOperand(OpNum).getReg() == ARM::CPSR &&
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
index 23a7f05..b7964c9 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
@@ -71,6 +71,7 @@ public:
   void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum) {}
   
   void printPredicateOperand(const MCInst *MI, unsigned OpNum);
+  void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum);
   void printSBitModifierOperand(const MCInst *MI, unsigned OpNum);
   void printRegisterList(const MCInst *MI, unsigned OpNum);
   void printCPInstOperand(const MCInst *MI, unsigned OpNum,
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
index 9efb5a1..57b65cf 100644
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -10,6 +10,8 @@ Reimplement 'select' in terms of 'SEL'.
 * Implement pre/post increment support.  (e.g. PR935)
 * Implement smarter constant generation for binops with large immediates.
 
+A few ARMv6T2 ops should be pattern matched: BFI, SBFX, and UBFX
+
 //===---------------------------------------------------------------------===//
 
 Crazy idea:  Consider code that uses lots of 8-bit or 16-bit values.  By the
diff --git a/lib/Target/ARM/TargetInfo/Android.mk b/lib/Target/ARM/TargetInfo/Android.mk
new file mode 100644
index 0000000..c1998a1
--- /dev/null
+++ b/lib/Target/ARM/TargetInfo/Android.mk
@@ -0,0 +1,24 @@
+LOCAL_PATH := $(call my-dir)
+
+# For the device only
+# =====================================================
+include $(CLEAR_VARS)
+include $(CLEAR_TBLGEN_VARS)
+
+TBLGEN_TABLES :=	\
+	ARMGenRegisterNames.inc	\
+	ARMGenInstrNames.inc
+
+TBLGEN_TD_DIR := $(LOCAL_PATH)/..
+
+LOCAL_SRC_FILES :=	\
+	ARMTargetInfo.cpp
+	
+LOCAL_C_INCLUDES +=	\
+	$(LOCAL_PATH)/..
+
+LOCAL_MODULE:= libLLVMARMInfo
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(LLVM_TBLGEN_RULES_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index d6630ce..163d1e9 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -450,9 +450,9 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     Offset -= AFI->getGPRCalleeSavedArea1Offset();
   else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
     Offset -= AFI->getGPRCalleeSavedArea2Offset();
-  else if (hasFP(MF)) {
-    assert(SPAdj == 0 && "Unexpected");
-    // There is alloca()'s in this function, must reference off the frame
+  else if (MF.getFrameInfo()->hasVarSizedObjects()) {
+    assert(SPAdj == 0 && hasFP(MF) && "Unexpected");
+    // There are alloca()'s in this function, must reference off the frame
     // pointer instead.
     FrameReg = getFrameRegister(MF);
     Offset -= AFI->getFramePtrSpillOffset();
@@ -778,9 +778,19 @@ static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
 }
 
 static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) {
-  return (MI->getOpcode() == ARM::tRestore &&
-          MI->getOperand(1).isFI() &&
-          isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs));
+  if (MI->getOpcode() == ARM::tRestore &&
+      MI->getOperand(1).isFI() &&
+      isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs))
+    return true;
+  else if (MI->getOpcode() == ARM::tPOP) {
+    // The first three operands are predicates and such. The last two are
+    // imp-def and imp-use of SP. Check everything in between.
+    for (int i = 3, e = MI->getNumOperands() - 2; i != e; ++i)
+      if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs))
+        return false;
+    return true;
+  }
+  return false;
 }
 
 void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF,
@@ -794,13 +804,13 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF,
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
   int NumBytes = (int)MFI->getStackSize();
+  const unsigned *CSRegs = getCalleeSavedRegs();
 
   if (!AFI->hasStackFrame()) {
     if (NumBytes != 0)
       emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes);
   } else {
     // Unwind MBBI to point to first LDR / VLDRD.
-    const unsigned *CSRegs = getCalleeSavedRegs();
     if (MBBI != MBB.begin()) {
       do
         --MBBI;
@@ -836,6 +846,9 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF,
   }
 
   if (VARegSaveSize) {
+    // Move back past the callee-saved register restoration
+    while (MBBI != MBB.end() && isCSRestore(MBBI, CSRegs))
+      ++MBBI;
     // Epilogue for vararg functions: pop LR to R3 and branch off it.
     AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
       .addReg(0) // No write back.
@@ -845,6 +858,7 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF,
 
     BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
       .addReg(ARM::R3, RegState::Kill);
+    // erase the old tBX_RET instruction
     MBB.erase(MBBI);
   }
 }
diff --git a/lib/Target/Alpha/AlphaCallingConv.td b/lib/Target/Alpha/AlphaCallingConv.td
index 38ada69..bde8819 100644
--- a/lib/Target/Alpha/AlphaCallingConv.td
+++ b/lib/Target/Alpha/AlphaCallingConv.td
@@ -14,7 +14,8 @@
 //===----------------------------------------------------------------------===//
 def RetCC_Alpha : CallingConv<[
   // i64 is returned in register R0
-  CCIfType<[i64], CCAssignToReg<[R0]>>,
+  // R1 is an llvm extension, I don't know what gcc does
+  CCIfType<[i64], CCAssignToReg<[R0,R1]>>,
 
   // f32 / f64 are returned in F0/F1
   CCIfType<[f32, f64], CCAssignToReg<[F0, F1]>>
diff --git a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
index eaefef9..5303d85 100644
--- a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
+++ b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
@@ -64,7 +64,7 @@ namespace {
     /// that the bits 1-7 of LHS are already zero.  If LHS is non-null, we are
     /// in checking mode.  If LHS is null, we assume that the mask has already
     /// been validated before.
-    uint64_t get_zapImm(SDValue LHS, uint64_t Constant) {
+    uint64_t get_zapImm(SDValue LHS, uint64_t Constant) const {
       uint64_t BitsToCheck = 0;
       unsigned Result = 0;
       for (unsigned i = 0; i != 8; ++i) {
@@ -159,10 +159,6 @@ namespace {
     // target-specific node if it hasn't already been changed.
     SDNode *Select(SDNode *N);
     
-    /// InstructionSelect - This callback is invoked by
-    /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-    virtual void InstructionSelect();
-    
     virtual const char *getPassName() const {
       return "Alpha DAG->DAG Pattern Instruction Selection";
     } 
@@ -222,20 +218,11 @@ SDNode *AlphaDAGToDAGISel::getGlobalRetAddr() {
   return CurDAG->getRegister(GlobalRetAddr, TLI.getPointerTy()).getNode();
 }
 
-/// InstructionSelect - This callback is invoked by
-/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-void AlphaDAGToDAGISel::InstructionSelect() {
-  // Select target instructions for the DAG.
-  SelectRoot(*CurDAG);
-  CurDAG->RemoveDeadNodes();
-}
-
 // Select - Convert the specified operand from a target-independent to a
 // target-specific node if it hasn't already been changed.
 SDNode *AlphaDAGToDAGISel::Select(SDNode *N) {
-  if (N->isMachineOpcode()) {
+  if (N->isMachineOpcode())
     return NULL;   // Already selected.
-  }
   DebugLoc dl = N->getDebugLoc();
 
   switch (N->getOpcode()) {
diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp
index 0bbe567..5d8310e 100644
--- a/lib/Target/Alpha/AlphaISelLowering.cpp
+++ b/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -21,7 +21,7 @@
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
 #include "llvm/Module.h"
@@ -282,7 +282,8 @@ AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                                    DAG.getIntPtrConstant(VA.getLocMemOffset()));
 
       MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
-                                         PseudoSourceValue::getStack(), 0));
+                                         PseudoSourceValue::getStack(), 0,
+                                         false, false, 0));
     }
   }
 
@@ -426,7 +427,8 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain,
       // Create the SelectionDAG nodes corresponding to a load
       //from this parameter
       SDValue FIN = DAG.getFrameIndex(FI, MVT::i64);
-      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0);
+      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0,
+                           false, false, 0);
     }
     InVals.push_back(ArgVal);
   }
@@ -442,14 +444,16 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain,
       int FI = MFI->CreateFixedObject(8, -8 * (6 - i), true, false);
       if (i == 0) VarArgsBase = FI;
       SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64);
-      LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0));
+      LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0,
+                                false, false, 0));
 
       if (TargetRegisterInfo::isPhysicalRegister(args_float[i]))
         args_float[i] = AddLiveIn(MF, args_float[i], &Alpha::F8RCRegClass);
       argt = DAG.getCopyFromReg(Chain, dl, args_float[i], MVT::f64);
       FI = MFI->CreateFixedObject(8, - 8 * (12 - i), true, false);
       SDFI = DAG.getFrameIndex(FI, MVT::i64);
-      LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0));
+      LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0,
+                                false, false, 0));
     }
 
     //Set up a token factor with all the stack traffic
@@ -528,11 +532,12 @@ void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain,
   const Value *VAListS = cast<SrcValueSDNode>(N->getOperand(2))->getValue();
   DebugLoc dl = N->getDebugLoc();
 
-  SDValue Base = DAG.getLoad(MVT::i64, dl, Chain, VAListP, VAListS, 0);
+  SDValue Base = DAG.getLoad(MVT::i64, dl, Chain, VAListP, VAListS, 0,
+                             false, false, 0);
   SDValue Tmp = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
                               DAG.getConstant(8, MVT::i64));
   SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Base.getValue(1),
-                                    Tmp, NULL, 0, MVT::i32);
+                                  Tmp, NULL, 0, MVT::i32, false, false, 0);
   DataPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Base, Offset);
   if (N->getValueType(0).isFloatingPoint())
   {
@@ -547,7 +552,7 @@ void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain,
   SDValue NewOffset = DAG.getNode(ISD::ADD, dl, MVT::i64, Offset,
                                     DAG.getConstant(8, MVT::i64));
   Chain = DAG.getTruncStore(Offset.getValue(1), dl, NewOffset, Tmp, NULL, 0,
-                            MVT::i32);
+                            MVT::i32, false, false, 0);
 }
 
 /// LowerOperation - Provide custom lowering hooks for some operations.
@@ -694,9 +699,10 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
     SDValue Result;
     if (Op.getValueType() == MVT::i32)
       Result = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Chain, DataPtr,
-                              NULL, 0, MVT::i32);
+                              NULL, 0, MVT::i32, false, false, 0);
     else
-      Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr, NULL, 0);
+      Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr, NULL, 0,
+                           false, false, 0);
     return Result;
   }
   case ISD::VACOPY: {
@@ -706,15 +712,18 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
     const Value *DestS = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
     const Value *SrcS = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
 
-    SDValue Val = DAG.getLoad(getPointerTy(), dl, Chain, SrcP, SrcS, 0);
-    SDValue Result = DAG.getStore(Val.getValue(1), dl, Val, DestP, DestS, 0);
+    SDValue Val = DAG.getLoad(getPointerTy(), dl, Chain, SrcP, SrcS, 0,
+                              false, false, 0);
+    SDValue Result = DAG.getStore(Val.getValue(1), dl, Val, DestP, DestS, 0,
+                                  false, false, 0);
     SDValue NP = DAG.getNode(ISD::ADD, dl, MVT::i64, SrcP,
                                DAG.getConstant(8, MVT::i64));
     Val = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Result,
-                         NP, NULL,0, MVT::i32);
+                         NP, NULL,0, MVT::i32, false, false, 0);
     SDValue NPD = DAG.getNode(ISD::ADD, dl, MVT::i64, DestP,
                                 DAG.getConstant(8, MVT::i64));
-    return DAG.getTruncStore(Val.getValue(1), dl, Val, NPD, NULL, 0, MVT::i32);
+    return DAG.getTruncStore(Val.getValue(1), dl, Val, NPD, NULL, 0, MVT::i32,
+                             false, false, 0);
   }
   case ISD::VASTART: {
     SDValue Chain = Op.getOperand(0);
@@ -723,11 +732,12 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
 
     // vastart stores the address of the VarArgsBase and VarArgsOffset
     SDValue FR  = DAG.getFrameIndex(VarArgsBase, MVT::i64);
-    SDValue S1  = DAG.getStore(Chain, dl, FR, VAListP, VAListS, 0);
+    SDValue S1  = DAG.getStore(Chain, dl, FR, VAListP, VAListS, 0,
+                               false, false, 0);
     SDValue SA2 = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
                                 DAG.getConstant(8, MVT::i64));
     return DAG.getTruncStore(S1, dl, DAG.getConstant(VarArgsOffset, MVT::i64),
-                             SA2, NULL, 0, MVT::i32);
+                             SA2, NULL, 0, MVT::i32, false, false, 0);
   }
   case ISD::RETURNADDR:
     return DAG.getNode(AlphaISD::GlobalRetAddr, DebugLoc::getUnknownLoc(),
@@ -749,7 +759,8 @@ void AlphaTargetLowering::ReplaceNodeResults(SDNode *N,
 
   SDValue Chain, DataPtr;
   LowerVAARG(N, Chain, DataPtr, DAG);
-  SDValue Res = DAG.getLoad(N->getValueType(0), dl, Chain, DataPtr, NULL, 0);
+  SDValue Res = DAG.getLoad(N->getValueType(0), dl, Chain, DataPtr, NULL, 0,
+                            false, false, 0);
   Results.push_back(Res);
   Results.push_back(SDValue(Res.getNode(), 1));
 }
diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td
index 8917e86..341c4a7 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.td
+++ b/lib/Target/Alpha/AlphaInstrInfo.td
@@ -92,7 +92,7 @@ def immSExt16int  : PatLeaf<(imm), [{ //(int)imm fits in a 16 bit sign extended
          ((int64_t)N->getZExtValue() << 32) >> 32;
 }], SExt16>;
 
-def zappat : PatFrag<(ops node:$LHS), (and node:$LHS, imm:$L), [{
+def zappat : PatFrag<(ops node:$LHS), (and node:$LHS, imm), [{
   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
   if (!RHS) return 0;
   uint64_t build = get_zapImm(N->getOperand(0), (uint64_t)RHS->getZExtValue());
@@ -602,9 +602,9 @@ def RPCC : MfcForm<0x18, 0xC000, "rpcc $RA", s_rpcc>; //Read process cycle count
 def MB  : MfcPForm<0x18, 0x4000, "mb",  s_imisc>; //memory barrier
 def WMB : MfcPForm<0x18, 0x4400, "wmb", s_imisc>; //write memory barrier
 
-def : Pat<(membarrier (i64 imm:$ll), (i64 imm:$ls), (i64 imm:$sl), (i64 1), (i64 imm:$dev)),
+def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 1), (i64 imm)),
           (WMB)>;
-def : Pat<(membarrier (i64 imm:$ll), (i64 imm:$ls), (i64 imm:$sl), (i64 imm:$ss), (i64 imm:$dev)),
+def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 imm), (i64 imm)),
           (MB)>;
 
 //Basic Floating point ops
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp
index 64bdd62..ba662fb 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.cpp
+++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp
@@ -251,7 +251,7 @@ void AlphaRegisterInfo::emitPrologue(MachineFunction &MF) const {
   } else {
     std::string msg;
     raw_string_ostream Msg(msg); 
-    Msg << "Too big a stack frame at " + NumBytes;
+    Msg << "Too big a stack frame at " << NumBytes;
     llvm_report_error(Msg.str());
   }
 
@@ -303,15 +303,14 @@ void AlphaRegisterInfo::emitEpilogue(MachineFunction &MF,
     } else {
       std::string msg;
       raw_string_ostream Msg(msg); 
-      Msg << "Too big a stack frame at " + NumBytes;
+      Msg << "Too big a stack frame at " << NumBytes;
       llvm_report_error(Msg.str());
     }
   }
 }
 
 unsigned AlphaRegisterInfo::getRARegister() const {
-  llvm_unreachable("What is the return address register");
-  return 0;
+  return Alpha::R26;
 }
 
 unsigned AlphaRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
diff --git a/lib/Target/Android.mk b/lib/Target/Android.mk
new file mode 100644
index 0000000..8bf4340
--- /dev/null
+++ b/lib/Target/Android.mk
@@ -0,0 +1,38 @@
+LOCAL_PATH:= $(call my-dir)
+
+target_SRC_FILES :=	\
+	Mangler.cpp	\
+	SubtargetFeature.cpp	\
+	Target.cpp	\
+	TargetAsmLexer.cpp	\
+	TargetData.cpp	\
+	TargetELFWriterInfo.cpp	\
+	TargetFrameInfo.cpp	\
+	TargetInstrInfo.cpp	\
+	TargetIntrinsicInfo.cpp	\
+	TargetLoweringObjectFile.cpp	\
+	TargetMachine.cpp	\
+	TargetRegisterInfo.cpp	\
+	TargetSubtarget.cpp
+
+# For the host
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(target_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMTarget
+
+include $(LLVM_HOST_BUILD_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+# For the device
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(target_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMTarget
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
index 2c9cc60..c8d71aa 100644
--- a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
+++ b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
@@ -41,7 +41,7 @@ namespace {
     BlackfinDAGToDAGISel(BlackfinTargetMachine &TM, CodeGenOpt::Level OptLevel)
       : SelectionDAGISel(TM, OptLevel) {}
 
-    virtual void InstructionSelect();
+    virtual void PostprocessISelDAG();
 
     virtual const char *getPassName() const {
       return "Blackfin DAG->DAG Pattern Instruction Selection";
@@ -72,13 +72,7 @@ FunctionPass *llvm::createBlackfinISelDag(BlackfinTargetMachine &TM,
   return new BlackfinDAGToDAGISel(TM, OptLevel);
 }
 
-/// InstructionSelect - This callback is invoked by
-/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-void BlackfinDAGToDAGISel::InstructionSelect() {
-  // Select target instructions for the DAG.
-  SelectRoot(*CurDAG);
-  DEBUG(errs() << "Selected selection DAG before regclass fixup:\n");
-  DEBUG(CurDAG->dump());
+void BlackfinDAGToDAGISel::PostprocessISelDAG() {
   FixRegisterClasses(*CurDAG);
 }
 
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp
index 269707a..5ce2013 100644
--- a/lib/Target/Blackfin/BlackfinISelLowering.cpp
+++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp
@@ -22,7 +22,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/ADT/VectorExtras.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -206,7 +206,8 @@ BlackfinTargetLowering::LowerFormalArguments(SDValue Chain,
       int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(),
                                       true, false);
       SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
-      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0));
+      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0,
+                                   false, false, 0));
     }
   }
 
@@ -329,7 +330,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       OffsetN = DAG.getNode(ISD::ADD, dl, MVT::i32, SPN, OffsetN);
       MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, OffsetN,
                                          PseudoSourceValue::getStack(),
-                                         Offset));
+                                         Offset, false, false, 0));
     }
   }
 
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index fd4c4e7..10f873f 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -49,7 +49,6 @@
 #include "llvm/System/Host.h"
 #include "llvm/Config/config.h"
 #include <algorithm>
-#include <sstream>
 using namespace llvm;
 
 extern "C" void LLVMInitializeCBackendTarget() { 
@@ -153,26 +152,16 @@ namespace {
       return false;
     }
 
-    raw_ostream &printType(formatted_raw_ostream &Out,
-                           const Type *Ty, 
+    raw_ostream &printType(raw_ostream &Out, const Type *Ty,
                            bool isSigned = false,
                            const std::string &VariableName = "",
                            bool IgnoreName = false,
                            const AttrListPtr &PAL = AttrListPtr());
-    std::ostream &printType(std::ostream &Out, const Type *Ty, 
-                           bool isSigned = false,
-                           const std::string &VariableName = "",
-                           bool IgnoreName = false,
-                           const AttrListPtr &PAL = AttrListPtr());
-    raw_ostream &printSimpleType(formatted_raw_ostream &Out,
-                                 const Type *Ty, 
-                                 bool isSigned, 
-                                 const std::string &NameSoFar = "");
-    std::ostream &printSimpleType(std::ostream &Out, const Type *Ty, 
-                                 bool isSigned, 
+    raw_ostream &printSimpleType(raw_ostream &Out, const Type *Ty,
+                                 bool isSigned,
                                  const std::string &NameSoFar = "");
 
-    void printStructReturnPointerFunctionType(formatted_raw_ostream &Out,
+    void printStructReturnPointerFunctionType(raw_ostream &Out,
                                               const AttrListPtr &PAL,
                                               const PointerType *Ty);
 
@@ -385,8 +374,8 @@ bool CBackendNameAllUsedStructsAndMergeFunctions::runOnModule(Module &M) {
     
     // If this isn't a struct or array type, remove it from our set of types
     // to name. This simplifies emission later.
-    if (!isa<StructType>(I->second) && !isa<OpaqueType>(I->second) &&
-        !isa<ArrayType>(I->second)) {
+    if (!I->second->isStructTy() && !I->second->isOpaqueTy() &&
+        !I->second->isArrayTy()) {
       TST.remove(I);
     } else {
       // If this is not used, remove it from the symbol table.
@@ -405,7 +394,7 @@ bool CBackendNameAllUsedStructsAndMergeFunctions::runOnModule(Module &M) {
   unsigned RenameCounter = 0;
   for (std::set<const Type *>::const_iterator I = UT.begin(), E = UT.end();
        I != E; ++I)
-    if (isa<StructType>(*I) || isa<ArrayType>(*I)) {
+    if ((*I)->isStructTy() || (*I)->isArrayTy()) {
       while (M.addTypeName("unnamed"+utostr(RenameCounter), *I))
         ++RenameCounter;
       Changed = true;
@@ -454,11 +443,12 @@ bool CBackendNameAllUsedStructsAndMergeFunctions::runOnModule(Module &M) {
 /// printStructReturnPointerFunctionType - This is like printType for a struct
 /// return type, except, instead of printing the type as void (*)(Struct*, ...)
 /// print it as "Struct (*)(...)", for struct return functions.
-void CWriter::printStructReturnPointerFunctionType(formatted_raw_ostream &Out,
+void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out,
                                                    const AttrListPtr &PAL,
                                                    const PointerType *TheTy) {
   const FunctionType *FTy = cast<FunctionType>(TheTy->getElementType());
-  std::stringstream FunctionInnards;
+  std::string tstr;
+  raw_string_ostream FunctionInnards(tstr);
   FunctionInnards << " (*) (";
   bool PrintedType = false;
 
@@ -470,7 +460,7 @@ void CWriter::printStructReturnPointerFunctionType(formatted_raw_ostream &Out,
       FunctionInnards << ", ";
     const Type *ArgTy = *I;
     if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
-      assert(isa<PointerType>(ArgTy));
+      assert(ArgTy->isPointerTy());
       ArgTy = cast<PointerType>(ArgTy)->getElementType();
     }
     printType(FunctionInnards, ArgTy,
@@ -484,63 +474,14 @@ void CWriter::printStructReturnPointerFunctionType(formatted_raw_ostream &Out,
     FunctionInnards << "void";
   }
   FunctionInnards << ')';
-  std::string tstr = FunctionInnards.str();
   printType(Out, RetTy, 
-      /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), tstr);
+      /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), FunctionInnards.str());
 }
 
 raw_ostream &
-CWriter::printSimpleType(formatted_raw_ostream &Out, const Type *Ty,
-                         bool isSigned,
+CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned,
                          const std::string &NameSoFar) {
-  assert((Ty->isPrimitiveType() || Ty->isInteger() || isa<VectorType>(Ty)) && 
-         "Invalid type for printSimpleType");
-  switch (Ty->getTypeID()) {
-  case Type::VoidTyID:   return Out << "void " << NameSoFar;
-  case Type::IntegerTyID: {
-    unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
-    if (NumBits == 1) 
-      return Out << "bool " << NameSoFar;
-    else if (NumBits <= 8)
-      return Out << (isSigned?"signed":"unsigned") << " char " << NameSoFar;
-    else if (NumBits <= 16)
-      return Out << (isSigned?"signed":"unsigned") << " short " << NameSoFar;
-    else if (NumBits <= 32)
-      return Out << (isSigned?"signed":"unsigned") << " int " << NameSoFar;
-    else if (NumBits <= 64)
-      return Out << (isSigned?"signed":"unsigned") << " long long "<< NameSoFar;
-    else { 
-      assert(NumBits <= 128 && "Bit widths > 128 not implemented yet");
-      return Out << (isSigned?"llvmInt128":"llvmUInt128") << " " << NameSoFar;
-    }
-  }
-  case Type::FloatTyID:  return Out << "float "   << NameSoFar;
-  case Type::DoubleTyID: return Out << "double "  << NameSoFar;
-  // Lacking emulation of FP80 on PPC, etc., we assume whichever of these is
-  // present matches host 'long double'.
-  case Type::X86_FP80TyID:
-  case Type::PPC_FP128TyID:
-  case Type::FP128TyID:  return Out << "long double " << NameSoFar;
-      
-  case Type::VectorTyID: {
-    const VectorType *VTy = cast<VectorType>(Ty);
-    return printSimpleType(Out, VTy->getElementType(), isSigned,
-                     " __attribute__((vector_size(" +
-                     utostr(TD->getTypeAllocSize(VTy)) + " ))) " + NameSoFar);
-  }
-    
-  default:
-#ifndef NDEBUG
-    errs() << "Unknown primitive type: " << *Ty << "\n";
-#endif
-    llvm_unreachable(0);
-  }
-}
-
-std::ostream &
-CWriter::printSimpleType(std::ostream &Out, const Type *Ty, bool isSigned,
-                         const std::string &NameSoFar) {
-  assert((Ty->isPrimitiveType() || Ty->isInteger() || isa<VectorType>(Ty)) && 
+  assert((Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) && 
          "Invalid type for printSimpleType");
   switch (Ty->getTypeID()) {
   case Type::VoidTyID:   return Out << "void " << NameSoFar;
@@ -587,120 +528,16 @@ CWriter::printSimpleType(std::ostream &Out, const Type *Ty, bool isSigned,
 // Pass the Type* and the variable name and this prints out the variable
 // declaration.
 //
-raw_ostream &CWriter::printType(formatted_raw_ostream &Out,
-                                const Type *Ty,
+raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty,
                                 bool isSigned, const std::string &NameSoFar,
                                 bool IgnoreName, const AttrListPtr &PAL) {
-  if (Ty->isPrimitiveType() || Ty->isInteger() || isa<VectorType>(Ty)) {
-    printSimpleType(Out, Ty, isSigned, NameSoFar);
-    return Out;
-  }
-
-  // Check to see if the type is named.
-  if (!IgnoreName || isa<OpaqueType>(Ty)) {
-    std::map<const Type *, std::string>::iterator I = TypeNames.find(Ty);
-    if (I != TypeNames.end()) return Out << I->second << ' ' << NameSoFar;
-  }
-
-  switch (Ty->getTypeID()) {
-  case Type::FunctionTyID: {
-    const FunctionType *FTy = cast<FunctionType>(Ty);
-    std::stringstream FunctionInnards;
-    FunctionInnards << " (" << NameSoFar << ") (";
-    unsigned Idx = 1;
-    for (FunctionType::param_iterator I = FTy->param_begin(),
-           E = FTy->param_end(); I != E; ++I) {
-      const Type *ArgTy = *I;
-      if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
-        assert(isa<PointerType>(ArgTy));
-        ArgTy = cast<PointerType>(ArgTy)->getElementType();
-      }
-      if (I != FTy->param_begin())
-        FunctionInnards << ", ";
-      printType(FunctionInnards, ArgTy,
-        /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt), "");
-      ++Idx;
-    }
-    if (FTy->isVarArg()) {
-      if (FTy->getNumParams())
-        FunctionInnards << ", ...";
-    } else if (!FTy->getNumParams()) {
-      FunctionInnards << "void";
-    }
-    FunctionInnards << ')';
-    std::string tstr = FunctionInnards.str();
-    printType(Out, FTy->getReturnType(), 
-      /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), tstr);
-    return Out;
-  }
-  case Type::StructTyID: {
-    const StructType *STy = cast<StructType>(Ty);
-    Out << NameSoFar + " {\n";
-    unsigned Idx = 0;
-    for (StructType::element_iterator I = STy->element_begin(),
-           E = STy->element_end(); I != E; ++I) {
-      Out << "  ";
-      printType(Out, *I, false, "field" + utostr(Idx++));
-      Out << ";\n";
-    }
-    Out << '}';
-    if (STy->isPacked())
-      Out << " __attribute__ ((packed))";
-    return Out;
-  }
-
-  case Type::PointerTyID: {
-    const PointerType *PTy = cast<PointerType>(Ty);
-    std::string ptrName = "*" + NameSoFar;
-
-    if (isa<ArrayType>(PTy->getElementType()) ||
-        isa<VectorType>(PTy->getElementType()))
-      ptrName = "(" + ptrName + ")";
-
-    if (!PAL.isEmpty())
-      // Must be a function ptr cast!
-      return printType(Out, PTy->getElementType(), false, ptrName, true, PAL);
-    return printType(Out, PTy->getElementType(), false, ptrName);
-  }
-
-  case Type::ArrayTyID: {
-    const ArrayType *ATy = cast<ArrayType>(Ty);
-    unsigned NumElements = ATy->getNumElements();
-    if (NumElements == 0) NumElements = 1;
-    // Arrays are wrapped in structs to allow them to have normal
-    // value semantics (avoiding the array "decay").
-    Out << NameSoFar << " { ";
-    printType(Out, ATy->getElementType(), false,
-              "array[" + utostr(NumElements) + "]");
-    return Out << "; }";
-  }
-
-  case Type::OpaqueTyID: {
-    std::string TyName = "struct opaque_" + itostr(OpaqueCounter++);
-    assert(TypeNames.find(Ty) == TypeNames.end());
-    TypeNames[Ty] = TyName;
-    return Out << TyName << ' ' << NameSoFar;
-  }
-  default:
-    llvm_unreachable("Unhandled case in getTypeProps!");
-  }
-
-  return Out;
-}
-
-// Pass the Type* and the variable name and this prints out the variable
-// declaration.
-//
-std::ostream &CWriter::printType(std::ostream &Out, const Type *Ty,
-                                 bool isSigned, const std::string &NameSoFar,
-                                 bool IgnoreName, const AttrListPtr &PAL) {
-  if (Ty->isPrimitiveType() || Ty->isInteger() || isa<VectorType>(Ty)) {
+  if (Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) {
     printSimpleType(Out, Ty, isSigned, NameSoFar);
     return Out;
   }
 
   // Check to see if the type is named.
-  if (!IgnoreName || isa<OpaqueType>(Ty)) {
+  if (!IgnoreName || Ty->isOpaqueTy()) {
     std::map<const Type *, std::string>::iterator I = TypeNames.find(Ty);
     if (I != TypeNames.end()) return Out << I->second << ' ' << NameSoFar;
   }
@@ -708,14 +545,15 @@ std::ostream &CWriter::printType(std::ostream &Out, const Type *Ty,
   switch (Ty->getTypeID()) {
   case Type::FunctionTyID: {
     const FunctionType *FTy = cast<FunctionType>(Ty);
-    std::stringstream FunctionInnards;
+    std::string tstr;
+    raw_string_ostream FunctionInnards(tstr);
     FunctionInnards << " (" << NameSoFar << ") (";
     unsigned Idx = 1;
     for (FunctionType::param_iterator I = FTy->param_begin(),
            E = FTy->param_end(); I != E; ++I) {
       const Type *ArgTy = *I;
       if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
-        assert(isa<PointerType>(ArgTy));
+        assert(ArgTy->isPointerTy());
         ArgTy = cast<PointerType>(ArgTy)->getElementType();
       }
       if (I != FTy->param_begin())
@@ -731,9 +569,8 @@ std::ostream &CWriter::printType(std::ostream &Out, const Type *Ty,
       FunctionInnards << "void";
     }
     FunctionInnards << ')';
-    std::string tstr = FunctionInnards.str();
     printType(Out, FTy->getReturnType(), 
-      /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), tstr);
+      /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), FunctionInnards.str());
     return Out;
   }
   case Type::StructTyID: {
@@ -756,8 +593,8 @@ std::ostream &CWriter::printType(std::ostream &Out, const Type *Ty,
     const PointerType *PTy = cast<PointerType>(Ty);
     std::string ptrName = "*" + NameSoFar;
 
-    if (isa<ArrayType>(PTy->getElementType()) ||
-        isa<VectorType>(PTy->getElementType()))
+    if (PTy->getElementType()->isArrayTy() ||
+        PTy->getElementType()->isVectorTy())
       ptrName = "(" + ptrName + ")";
 
     if (!PAL.isEmpty())
@@ -1144,7 +981,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
     Out << "((";
     printType(Out, CPV->getType()); // sign doesn't matter
     Out << ")/*UNDEF*/";
-    if (!isa<VectorType>(CPV->getType())) {
+    if (!CPV->getType()->isVectorTy()) {
       Out << "0)";
     } else {
       Out << "{})";
@@ -1396,7 +1233,7 @@ bool CWriter::printConstExprCast(const ConstantExpr* CE, bool Static) {
   }
   if (NeedsExplicitCast) {
     Out << "((";
-    if (Ty->isInteger() && Ty != Type::getInt1Ty(Ty->getContext()))
+    if (Ty->isIntegerTy() && Ty != Type::getInt1Ty(Ty->getContext()))
       printSimpleType(Out, Ty, TypeIsSigned);
     else
       printType(Out, Ty); // not integer, sign doesn't matter
@@ -1497,7 +1334,7 @@ void CWriter::writeInstComputationInline(Instruction &I) {
   // We can't currently support integer types other than 1, 8, 16, 32, 64.
   // Validate this.
   const Type *Ty = I.getType();
-  if (Ty->isInteger() && (Ty!=Type::getInt1Ty(I.getContext()) &&
+  if (Ty->isIntegerTy() && (Ty!=Type::getInt1Ty(I.getContext()) &&
         Ty!=Type::getInt8Ty(I.getContext()) && 
         Ty!=Type::getInt16Ty(I.getContext()) &&
         Ty!=Type::getInt32Ty(I.getContext()) &&
@@ -1660,7 +1497,7 @@ void CWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) {
 
   // If the operand was a pointer, convert to a large integer type.
   const Type* OpTy = Operand->getType();
-  if (isa<PointerType>(OpTy))
+  if (OpTy->isPointerTy())
     OpTy = TD->getIntPtrType(Operand->getContext());
   
   Out << "((";
@@ -2102,10 +1939,10 @@ bool CWriter::doInitialization(Module &M) {
           // complete.  If the value is an aggregate, print out { 0 }, and let
           // the compiler figure out the rest of the zeros.
           Out << " = " ;
-          if (isa<StructType>(I->getInitializer()->getType()) ||
-              isa<VectorType>(I->getInitializer()->getType())) {
+          if (I->getInitializer()->getType()->isStructTy() ||
+              I->getInitializer()->getType()->isVectorTy()) {
             Out << "{ 0 }";
-          } else if (isa<ArrayType>(I->getInitializer()->getType())) {
+          } else if (I->getInitializer()->getType()->isArrayTy()) {
             // As with structs and vectors, but with an extra set of braces
             // because arrays are wrapped in structs.
             Out << "{ { 0 } }";
@@ -2274,7 +2111,7 @@ void CWriter::printModuleTypes(const TypeSymbolTable &TST) {
   //
   Out << "/* Structure contents */\n";
   for (I = TST.begin(); I != End; ++I)
-    if (isa<StructType>(I->second) || isa<ArrayType>(I->second))
+    if (I->second->isStructTy() || I->second->isArrayTy())
       // Only print out used types!
       printContainedStructs(I->second, StructPrinted);
 }
@@ -2287,14 +2124,15 @@ void CWriter::printModuleTypes(const TypeSymbolTable &TST) {
 void CWriter::printContainedStructs(const Type *Ty,
                                     std::set<const Type*> &StructPrinted) {
   // Don't walk through pointers.
-  if (isa<PointerType>(Ty) || Ty->isPrimitiveType() || Ty->isInteger()) return;
+  if (Ty->isPointerTy() || Ty->isPrimitiveType() || Ty->isIntegerTy())
+    return;
   
   // Print all contained types first.
   for (Type::subtype_iterator I = Ty->subtype_begin(),
        E = Ty->subtype_end(); I != E; ++I)
     printContainedStructs(*I, StructPrinted);
   
-  if (isa<StructType>(Ty) || isa<ArrayType>(Ty)) {
+  if (Ty->isStructTy() || Ty->isArrayTy()) {
     // Check to see if we have already printed this struct.
     if (StructPrinted.insert(Ty).second) {
       // Print structure type out.
@@ -2327,7 +2165,8 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
   const FunctionType *FT = cast<FunctionType>(F->getFunctionType());
   const AttrListPtr &PAL = F->getAttributes();
 
-  std::stringstream FunctionInnards;
+  std::string tstr;
+  raw_string_ostream FunctionInnards(tstr);
 
   // Print out the name...
   FunctionInnards << GetValueName(F) << '(';
@@ -2382,7 +2221,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
       if (PrintedArg) FunctionInnards << ", ";
       const Type *ArgTy = *I;
       if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
-        assert(isa<PointerType>(ArgTy));
+        assert(ArgTy->isPointerTy());
         ArgTy = cast<PointerType>(ArgTy)->getElementType();
       }
       printType(FunctionInnards, ArgTy,
@@ -2423,8 +2262,8 @@ static inline bool isFPIntBitCast(const Instruction &I) {
     return false;
   const Type *SrcTy = I.getOperand(0)->getType();
   const Type *DstTy = I.getType();
-  return (SrcTy->isFloatingPoint() && DstTy->isInteger()) ||
-         (DstTy->isFloatingPoint() && SrcTy->isInteger());
+  return (SrcTy->isFloatingPointTy() && DstTy->isIntegerTy()) ||
+         (DstTy->isFloatingPointTy() && SrcTy->isIntegerTy());
 }
 
 void CWriter::printFunction(Function &F) {
@@ -2713,7 +2552,7 @@ void CWriter::visitPHINode(PHINode &I) {
 
 void CWriter::visitBinaryOperator(Instruction &I) {
   // binary instructions, shift instructions, setCond instructions.
-  assert(!isa<PointerType>(I.getType()));
+  assert(!I.getType()->isPointerTy());
 
   // We must cast the results of binary operations which might be promoted.
   bool needsCast = false;
@@ -3489,7 +3328,7 @@ void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I,
     // exposed, like a global, avoid emitting (&foo)[0], just emit foo instead.
     if (isAddressExposed(Ptr)) {
       writeOperandInternal(Ptr, Static);
-    } else if (I != E && isa<StructType>(*I)) {
+    } else if (I != E && (*I)->isStructTy()) {
       // If we didn't already emit the first operand, see if we can print it as
       // P->f instead of "P[0].f"
       writeOperand(Ptr);
@@ -3504,13 +3343,13 @@ void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I,
   }
 
   for (; I != E; ++I) {
-    if (isa<StructType>(*I)) {
+    if ((*I)->isStructTy()) {
       Out << ".field" << cast<ConstantInt>(I.getOperand())->getZExtValue();
-    } else if (isa<ArrayType>(*I)) {
+    } else if ((*I)->isArrayTy()) {
       Out << ".array[";
       writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr);
       Out << ']';
-    } else if (!isa<VectorType>(*I)) {
+    } else if (!(*I)->isVectorTy()) {
       Out << '[';
       writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr);
       Out << ']';
@@ -3668,7 +3507,7 @@ void CWriter::visitInsertValueInst(InsertValueInst &IVI) {
        i != e; ++i) {
     const Type *IndexedTy =
       ExtractValueInst::getIndexedType(IVI.getOperand(0)->getType(), b, i+1);
-    if (isa<ArrayType>(IndexedTy))
+    if (IndexedTy->isArrayTy())
       Out << ".array[" << *i << "]";
     else
       Out << ".field" << *i;
@@ -3689,7 +3528,7 @@ void CWriter::visitExtractValueInst(ExtractValueInst &EVI) {
          i != e; ++i) {
       const Type *IndexedTy =
         ExtractValueInst::getIndexedType(EVI.getOperand(0)->getType(), b, i+1);
-      if (isa<ArrayType>(IndexedTy))
+      if (IndexedTy->isArrayTy())
         Out << ".array[" << *i << "]";
       else
         Out << ".field" << *i;
@@ -3705,7 +3544,8 @@ void CWriter::visitExtractValueInst(ExtractValueInst &EVI) {
 bool CTargetMachine::addPassesToEmitWholeFile(PassManager &PM,
                                               formatted_raw_ostream &o,
                                               CodeGenFileType FileType,
-                                              CodeGenOpt::Level OptLevel) {
+                                              CodeGenOpt::Level OptLevel,
+                                              bool DisableVerify) {
   if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
 
   PM.add(createGCLoweringPass());
diff --git a/lib/Target/CBackend/CTargetMachine.h b/lib/Target/CBackend/CTargetMachine.h
index 715bbda..d178e7f 100644
--- a/lib/Target/CBackend/CTargetMachine.h
+++ b/lib/Target/CBackend/CTargetMachine.h
@@ -27,7 +27,8 @@ struct CTargetMachine : public TargetMachine {
   virtual bool addPassesToEmitWholeFile(PassManager &PM,
                                         formatted_raw_ostream &Out,
                                         CodeGenFileType FileType,
-                                        CodeGenOpt::Level OptLevel);
+                                        CodeGenOpt::Level OptLevel,
+                                        bool DisableVerify);
   
   virtual const TargetData *getTargetData() const { return 0; }
 };
diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td
index 06eb149..47cb579 100644
--- a/lib/Target/CellSPU/SPU64InstrInfo.td
+++ b/lib/Target/CellSPU/SPU64InstrInfo.td
@@ -123,8 +123,8 @@ multiclass CompareLogicalGreaterThan64 {
 defm I64LGT: CompareLogicalGreaterThan64;
 
 def : Pat<(setugt R64C:$rA, R64C:$rB), I64LGTr64.Fragment>;
-def : Pat<(setugt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
-                  I64LGTv2i64.Fragment>;
+//def : Pat<(setugt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
+//          I64LGTv2i64.Fragment>;
 
 // i64 setult:
 def : I64SETCCNegCond<setule, I64LGTr64>;
@@ -201,8 +201,8 @@ multiclass CompareGreaterThan64 {
 defm I64GT: CompareLogicalGreaterThan64;
 
 def : Pat<(setgt R64C:$rA, R64C:$rB), I64GTr64.Fragment>;
-def : Pat<(setgt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
-                  I64GTv2i64.Fragment>;
+//def : Pat<(setgt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
+//                  I64GTv2i64.Fragment>;
 
 // i64 setult:
 def : I64SETCCNegCond<setle, I64GTr64>;
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index 80693e1..396a921 100644
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -294,15 +294,19 @@ namespace {
           ((vecVT == MVT::v2i64) &&
            ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
             (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
-            (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0))))
-        return Select(bvNode);
+            (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)))) {
+        HandleSDNode Dummy(SDValue(bvNode, 0));
+        if (SDNode *N = Select(bvNode))
+          return N;
+        return Dummy.getValue().getNode();
+      }
 
       // No, need to emit a constant pool spill:
       std::vector<Constant*> CV;
 
       for (size_t i = 0; i < bvNode->getNumOperands(); ++i) {
         ConstantSDNode *V = dyn_cast<ConstantSDNode > (bvNode->getOperand(i));
-        CV.push_back(const_cast<ConstantInt *> (V->getConstantIntValue()));
+        CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
       }
 
       Constant *CP = ConstantVector::get(CV);
@@ -311,10 +315,15 @@ namespace {
       SDValue CGPoolOffset =
               SPU::LowerConstantPool(CPIdx, *CurDAG,
                                      SPUtli.getSPUTargetMachine());
-      return SelectCode(CurDAG->getLoad(vecVT, dl,
-                                        CurDAG->getEntryNode(), CGPoolOffset,
-                                        PseudoSourceValue::getConstantPool(), 0,
-                                        false, Alignment).getNode());
+      
+      HandleSDNode Dummy(CurDAG->getLoad(vecVT, dl,
+                                         CurDAG->getEntryNode(), CGPoolOffset,
+                                         PseudoSourceValue::getConstantPool(),0,
+                                         false, false, Alignment));
+      CurDAG->ReplaceAllUsesWith(SDValue(bvNode, 0), Dummy.getValue());
+      if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
+        return N;
+      return Dummy.getValue().getNode();
     }
 
     /// Select - Convert the specified operand from a target-independent to a
@@ -390,10 +399,6 @@ namespace {
       return false;
     }
 
-    /// InstructionSelect - This callback is invoked by
-    /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-    virtual void InstructionSelect();
-
     virtual const char *getPassName() const {
       return "Cell SPU DAG->DAG Pattern Instruction Selection";
     }
@@ -411,16 +416,6 @@ namespace {
   };
 }
 
-/// InstructionSelect - This callback is invoked by
-/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-void
-SPUDAGToDAGISel::InstructionSelect()
-{
-  // Select target instructions for the DAG.
-  SelectRoot(*CurDAG);
-  CurDAG->RemoveDeadNodes();
-}
-
 /*!
  \arg Op The ISD instruction operand
  \arg N The address to be tested
@@ -692,9 +687,8 @@ SPUDAGToDAGISel::Select(SDNode *N) {
   SDValue Ops[8];
   DebugLoc dl = N->getDebugLoc();
 
-  if (N->isMachineOpcode()) {
+  if (N->isMachineOpcode())
     return NULL;   // Already selected.
-  }
 
   if (Opc == ISD::FrameIndex) {
     int FI = cast<FrameIndexSDNode>(N)->getIndex();
@@ -759,43 +753,67 @@ SPUDAGToDAGISel::Select(SDNode *N) {
     }
 
     SDNode *shufMaskLoad = emitBuildVector(shufMask.getNode());
-    SDNode *PromoteScalar =
-            SelectCode(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl,
-                                       Op0VecVT, Op0).getNode());
-
+    
+    HandleSDNode PromoteScalar(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl,
+                                               Op0VecVT, Op0));
+    
+    SDValue PromScalar;
+    if (SDNode *N = SelectCode(PromoteScalar.getValue().getNode()))
+      PromScalar = SDValue(N, 0);
+    else
+      PromScalar = PromoteScalar.getValue();
+    
     SDValue zextShuffle =
             CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT,
-                            SDValue(PromoteScalar, 0),
-                            SDValue(PromoteScalar, 0),
+                            PromScalar, PromScalar, 
                             SDValue(shufMaskLoad, 0));
 
-    // N.B.: BIT_CONVERT replaces and updates the zextShuffle node, so we
-    // re-use it in the VEC2PREFSLOT selection without needing to explicitly
-    // call SelectCode (it's already done for us.)
-    SelectCode(CurDAG->getNode(ISD::BIT_CONVERT, dl, OpVecVT, zextShuffle).getNode());
-    return SelectCode(CurDAG->getNode(SPUISD::VEC2PREFSLOT, dl, OpVT,
-                                      zextShuffle).getNode());
+    HandleSDNode Dummy2(zextShuffle);
+    if (SDNode *N = SelectCode(Dummy2.getValue().getNode()))
+      zextShuffle = SDValue(N, 0);
+    else
+      zextShuffle = Dummy2.getValue();
+    HandleSDNode Dummy(CurDAG->getNode(SPUISD::VEC2PREFSLOT, dl, OpVT,
+                                       zextShuffle));
+    
+    CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
+    SelectCode(Dummy.getValue().getNode());
+    return Dummy.getValue().getNode();
   } else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
     SDNode *CGLoad =
             emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode());
 
-    return SelectCode(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT,
-                                      N->getOperand(0), N->getOperand(1),
-                                      SDValue(CGLoad, 0)).getNode());
+    HandleSDNode Dummy(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT,
+                                       N->getOperand(0), N->getOperand(1),
+                                       SDValue(CGLoad, 0)));
+    
+    CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
+    if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
+      return N;
+    return Dummy.getValue().getNode();
   } else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
     SDNode *CGLoad =
             emitBuildVector(getBorrowGenerateShufMask(*CurDAG, dl).getNode());
 
-    return SelectCode(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT,
-                                      N->getOperand(0), N->getOperand(1),
-                                      SDValue(CGLoad, 0)).getNode());
+    HandleSDNode Dummy(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT,
+                                       N->getOperand(0), N->getOperand(1),
+                                       SDValue(CGLoad, 0)));
+    
+    CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
+    if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
+      return N;
+    return Dummy.getValue().getNode();
   } else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
     SDNode *CGLoad =
             emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode());
 
-    return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, dl, OpVT,
-                                      N->getOperand(0), N->getOperand(1),
-                                      SDValue(CGLoad, 0)).getNode());
+    HandleSDNode Dummy(CurDAG->getNode(SPUISD::MUL64_MARKER, dl, OpVT,
+                                       N->getOperand(0), N->getOperand(1),
+                                       SDValue(CGLoad, 0)));
+    CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
+    if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
+      return N;
+    return Dummy.getValue().getNode();
   } else if (Opc == ISD::TRUNCATE) {
     SDValue Op0 = N->getOperand(0);
     if ((Op0.getOpcode() == ISD::SRA || Op0.getOpcode() == ISD::SRL)
@@ -832,17 +850,14 @@ SPUDAGToDAGISel::Select(SDNode *N) {
       }
     }
   } else if (Opc == ISD::SHL) {
-    if (OpVT == MVT::i64) {
+    if (OpVT == MVT::i64)
       return SelectSHLi64(N, OpVT);
-    }
   } else if (Opc == ISD::SRL) {
-    if (OpVT == MVT::i64) {
+    if (OpVT == MVT::i64)
       return SelectSRLi64(N, OpVT);
-    }
   } else if (Opc == ISD::SRA) {
-    if (OpVT == MVT::i64) {
+    if (OpVT == MVT::i64)
       return SelectSRAi64(N, OpVT);
-    }
   } else if (Opc == ISD::FNEG
              && (OpVT == MVT::f64 || OpVT == MVT::v2f64)) {
     DebugLoc dl = N->getDebugLoc();
@@ -1224,13 +1239,15 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT,
                             ? shufmask.getNode()
                             : emitBuildVector(shufmask.getNode()));
 
-    SDNode *shufNode =
-            Select(CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT,
+   SDValue shufNode =
+            CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT,
                                    SDValue(lhsNode, 0), SDValue(rhsNode, 0),
-                                   SDValue(shufMaskNode, 0)).getNode());
-
-    return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT,
-                                  SDValue(shufNode, 0));
+                                   SDValue(shufMaskNode, 0));
+    HandleSDNode Dummy(shufNode);
+    SDNode *SN = SelectCode(Dummy.getValue().getNode());
+    if (SN == 0) SN = Dummy.getValue().getNode();
+    
+    return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(SN, 0));
   } else if (i64vec.getOpcode() == ISD::BUILD_VECTOR) {
     return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT,
                                   SDValue(emitBuildVector(i64vec.getNode()), 0));
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index fe0f019..e863ee3 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -25,7 +25,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/ADT/VectorExtras.h"
 #include "llvm/Support/Debug.h"
@@ -118,8 +118,7 @@ namespace {
             TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
                             0, TLI.getLibcallCallingConv(LC), false,
                             /*isReturnValueUsed=*/true,
-                            Callee, Args, DAG, Op.getDebugLoc(),
-                            DAG.GetOrdering(InChain.getNode()));
+                            Callee, Args, DAG, Op.getDebugLoc());
 
     return CallInfo.first;
   }
@@ -669,7 +668,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
     // Re-emit as a v16i8 vector load
     result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
                          LN->getSrcValue(), LN->getSrcValueOffset(),
-                         LN->isVolatile(), 16);
+                         LN->isVolatile(), LN->isNonTemporal(), 16);
 
     // Update the chain
     the_chain = result.getValue(1);
@@ -820,7 +819,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
     // Re-emit as a v16i8 vector load
     alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
                                SN->getSrcValue(), SN->getSrcValueOffset(),
-                               SN->isVolatile(), 16);
+                               SN->isVolatile(), SN->isNonTemporal(), 16);
 
     // Update the chain
     the_chain = alignLoadVec.getValue(1);
@@ -861,7 +860,8 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 
     result = DAG.getStore(the_chain, dl, result, basePtr,
                           LN->getSrcValue(), LN->getSrcValueOffset(),
-                          LN->isVolatile(), LN->getAlignment());
+                          LN->isVolatile(), LN->isNonTemporal(),
+                          LN->getAlignment());
 
 #if 0 && !defined(NDEBUG)
     if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
@@ -1086,7 +1086,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
       // or we're forced to do vararg
       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true, false);
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
-      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0);
+      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, false, false, 0);
       ArgOffset += StackSlotSize;
     }
 
@@ -1108,7 +1108,8 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
                                                  true, false);
       SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
       SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
-      SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0);
+      SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0,
+                                   false, false, 0);
       Chain = Store.getOperand(0);
       MemOps.push_back(Store);
 
@@ -1190,7 +1191,8 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       if (ArgRegIdx != NumArgRegs) {
         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
       } else {
-        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
+        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+                                           false, false, 0));
         ArgOffset += StackSlotSize;
       }
       break;
@@ -1199,7 +1201,8 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       if (ArgRegIdx != NumArgRegs) {
         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
       } else {
-        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
+        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+                                           false, false, 0));
         ArgOffset += StackSlotSize;
       }
       break;
@@ -1212,7 +1215,8 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       if (ArgRegIdx != NumArgRegs) {
         RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
       } else {
-        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
+        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+                                           false, false, 0));
         ArgOffset += StackSlotSize;
       }
       break;
diff --git a/lib/Target/CellSPU/SPUMCAsmInfo.cpp b/lib/Target/CellSPU/SPUMCAsmInfo.cpp
index 5ef3c6b..3e17a51 100644
--- a/lib/Target/CellSPU/SPUMCAsmInfo.cpp
+++ b/lib/Target/CellSPU/SPUMCAsmInfo.cpp
@@ -34,5 +34,8 @@ SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, const StringRef &TT) {
   // Exception handling is not supported on CellSPU (think about it: you only
   // have 256K for code+data. Would you support exception handling?)
   ExceptionsType = ExceptionHandling::None;
+
+  // SPU assembly requires ".section" before ".bss" 
+  UsesELFSectionDirectiveForBSS = true;  
 }
 
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 3dd8ca7..9c5893c 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -221,7 +221,7 @@ namespace {
     APFloat APF = APFloat(CFP->getValueAPF());  // copy
     if (CFP->getType() == Type::getFloatTy(CFP->getContext()))
       APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
-    Out << "ConstantFP::get(getGlobalContext(), ";
+    Out << "ConstantFP::get(mod->getContext(), ";
     Out << "APFloat(";
 #if HAVE_PRINTF_A
     char Buffer[100];
@@ -344,23 +344,23 @@ namespace {
 
   std::string CppWriter::getCppName(const Type* Ty) {
     // First, handle the primitive types .. easy
-    if (Ty->isPrimitiveType() || Ty->isInteger()) {
+    if (Ty->isPrimitiveType() || Ty->isIntegerTy()) {
       switch (Ty->getTypeID()) {
-      case Type::VoidTyID:   return "Type::getVoidTy(getGlobalContext())";
+      case Type::VoidTyID:   return "Type::getVoidTy(mod->getContext())";
       case Type::IntegerTyID: {
         unsigned BitWidth = cast<IntegerType>(Ty)->getBitWidth();
-        return "IntegerType::get(getGlobalContext(), " + utostr(BitWidth) + ")";
+        return "IntegerType::get(mod->getContext(), " + utostr(BitWidth) + ")";
       }
-      case Type::X86_FP80TyID: return "Type::getX86_FP80Ty(getGlobalContext())";
-      case Type::FloatTyID:    return "Type::getFloatTy(getGlobalContext())";
-      case Type::DoubleTyID:   return "Type::getDoubleTy(getGlobalContext())";
-      case Type::LabelTyID:    return "Type::getLabelTy(getGlobalContext())";
+      case Type::X86_FP80TyID: return "Type::getX86_FP80Ty(mod->getContext())";
+      case Type::FloatTyID:    return "Type::getFloatTy(mod->getContext())";
+      case Type::DoubleTyID:   return "Type::getDoubleTy(mod->getContext())";
+      case Type::LabelTyID:    return "Type::getLabelTy(mod->getContext())";
       default:
         error("Invalid primitive type");
         break;
       }
       // shouldn't be returned, but make it sensible
-      return "Type::getVoidTy(getGlobalContext())";
+      return "Type::getVoidTy(mod->getContext())";
     }
 
     // Now, see if we've seen the type before and return that
@@ -493,7 +493,7 @@ namespace {
 
   bool CppWriter::printTypeInternal(const Type* Ty) {
     // We don't print definitions for primitive types
-    if (Ty->isPrimitiveType() || Ty->isInteger())
+    if (Ty->isPrimitiveType() || Ty->isIntegerTy())
       return false;
 
     // If we already defined this type, we don't need to define it again.
@@ -514,7 +514,7 @@ namespace {
       TypeMap::const_iterator I = UnresolvedTypes.find(Ty);
       if (I == UnresolvedTypes.end()) {
         Out << "PATypeHolder " << typeName;
-        Out << "_fwd = OpaqueType::get(getGlobalContext());";
+        Out << "_fwd = OpaqueType::get(mod->getContext());";
         nl(Out);
         UnresolvedTypes[Ty] = typeName;
       }
@@ -615,7 +615,7 @@ namespace {
     }
     case Type::OpaqueTyID: {
       Out << "OpaqueType* " << typeName;
-      Out << " = OpaqueType::get(getGlobalContext());";
+      Out << " = OpaqueType::get(mod->getContext());";
       nl(Out);
       break;
     }
@@ -686,7 +686,7 @@ namespace {
 
       // For primitive types and types already defined, just add a name
       TypeMap::const_iterator TNI = TypeNames.find(TI->second);
-      if (TI->second->isInteger() || TI->second->isPrimitiveType() ||
+      if (TI->second->isIntegerTy() || TI->second->isPrimitiveType() ||
           TNI != TypeNames.end()) {
         Out << "mod->addTypeName(\"";
         printEscapedString(TI->first);
@@ -751,7 +751,7 @@ namespace {
     if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
       std::string constValue = CI->getValue().toString(10, true);
       Out << "ConstantInt* " << constName
-          << " = ConstantInt::get(getGlobalContext(), APInt("
+          << " = ConstantInt::get(mod->getContext(), APInt("
           << cast<IntegerType>(CI->getType())->getBitWidth()
           << ", StringRef(\"" <<  constValue << "\"), 10));";
     } else if (isa<ConstantAggregateZero>(CV)) {
@@ -769,7 +769,7 @@ namespace {
           CA->getType()->getElementType() ==
               Type::getInt8Ty(CA->getContext())) {
         Out << "Constant* " << constName <<
-               " = ConstantArray::get(getGlobalContext(), \"";
+               " = ConstantArray::get(mod->getContext(), \"";
         std::string tmp = CA->getAsString();
         bool nullTerminate = false;
         if (tmp[tmp.length()-1] == 0) {
@@ -995,7 +995,7 @@ namespace {
   void CppWriter::printVariableHead(const GlobalVariable *GV) {
     nl(Out) << "GlobalVariable* " << getCppName(GV);
     if (is_inline) {
-      Out << " = mod->getGlobalVariable(getGlobalContext(), ";
+      Out << " = mod->getGlobalVariable(mod->getContext(), ";
       printEscapedString(GV->getName());
       Out << ", " << getCppName(GV->getType()->getElementType()) << ",true)";
       nl(Out) << "if (!" << getCppName(GV) << ") {";
@@ -1094,7 +1094,7 @@ namespace {
 
     case Instruction::Ret: {
       const ReturnInst* ret =  cast<ReturnInst>(I);
-      Out << "ReturnInst::Create(getGlobalContext(), "
+      Out << "ReturnInst::Create(mod->getContext(), "
           << (ret->getReturnValue() ? opNames[0] + ", " : "") << bbname << ");";
       break;
     }
@@ -1171,7 +1171,7 @@ namespace {
     }
     case Instruction::Unreachable: {
       Out << "new UnreachableInst("
-          << "getGlobalContext(), "
+          << "mod->getContext(), "
           << bbname << ");";
       break;
     }
@@ -1673,7 +1673,7 @@ namespace {
          BI != BE; ++BI) {
       std::string bbname(getCppName(BI));
       Out << "BasicBlock* " << bbname <<
-             " = BasicBlock::Create(getGlobalContext(), \"";
+             " = BasicBlock::Create(mod->getContext(), \"";
       if (BI->hasName())
         printEscapedString(BI->getName());
       Out << "\"," << getCppName(BI->getParent()) << ",0);";
@@ -2009,7 +2009,8 @@ char CppWriter::ID = 0;
 bool CPPTargetMachine::addPassesToEmitWholeFile(PassManager &PM,
                                                 formatted_raw_ostream &o,
                                                 CodeGenFileType FileType,
-                                                CodeGenOpt::Level OptLevel) {
+                                                CodeGenOpt::Level OptLevel,
+                                                bool DisableVerify) {
   if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
   PM.add(new CppWriter(o));
   return false;
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
index 1f74f76..b7aae91 100644
--- a/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -30,7 +30,8 @@ struct CPPTargetMachine : public TargetMachine {
   virtual bool addPassesToEmitWholeFile(PassManager &PM,
                                         formatted_raw_ostream &Out,
                                         CodeGenFileType FileType,
-                                        CodeGenOpt::Level OptLevel);
+                                        CodeGenOpt::Level OptLevel,
+                                        bool DisableVerify);
 
   virtual const TargetData *getTargetData() const { return 0; }
 };
diff --git a/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt b/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt
new file mode 100644
index 0000000..cfb2fc8
--- /dev/null
+++ b/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt
@@ -0,0 +1,9 @@
+include_directories(
+  ${CMAKE_CURRENT_BINARY_DIR}/..
+  ${CMAKE_CURRENT_SOURCE_DIR}/..
+  )
+
+add_llvm_library(LLVMMBlazeAsmPrinter
+  MBlazeAsmPrinter.cpp
+  )
+add_dependencies(LLVMMBlazeAsmPrinter MBlazeCodeGenTable_gen)
+\ No newline at end of file
diff --git a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
new file mode 100644
index 0000000..6fe1026
--- /dev/null
+++ b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
@@ -0,0 +1,302 @@
+//===-- MBlazeAsmPrinter.cpp - MBlaze LLVM assembly writer ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format MBlaze assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-asm-printer"
+
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
+#include "MBlazeInstrInfo.h"
+#include "MBlazeTargetMachine.h"
+#include "MBlazeMachineFunction.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/MathExtras.h"
+#include <cctype>
+
+using namespace llvm;
+
+namespace {
+  class MBlazeAsmPrinter : public AsmPrinter {
+    const MBlazeSubtarget *Subtarget;
+  public:
+    explicit MBlazeAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
+                              MCContext &Ctx, MCStreamer &Streamer, 
+                              const MCAsmInfo *T )
+      : AsmPrinter(O, TM, Ctx, Streamer, T) {
+      Subtarget = &TM.getSubtarget<MBlazeSubtarget>();
+    }
+
+    virtual const char *getPassName() const {
+      return "MBlaze Assembly Printer";
+    }
+
+    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                         unsigned AsmVariant, const char *ExtraCode);
+    void printOperand(const MachineInstr *MI, int opNum);
+    void printUnsignedImm(const MachineInstr *MI, int opNum);
+    void printFSLImm(const MachineInstr *MI, int opNum);
+    void printMemOperand(const MachineInstr *MI, int opNum,
+                         const char *Modifier = 0);
+    void printFCCOperand(const MachineInstr *MI, int opNum,
+                         const char *Modifier = 0);
+    void printSavedRegsBitmask();
+    void printHex32(unsigned int Value);
+
+    const char *emitCurrentABIString();
+    void emitFrameDirective();
+
+    void printInstruction(const MachineInstr *MI);  // autogenerated.
+    void EmitInstruction(const MachineInstr *MI) { 
+        printInstruction(MI);
+        O << '\n';
+    }
+    virtual void EmitFunctionBodyStart();
+    virtual void EmitFunctionBodyEnd();
+    static const char *getRegisterName(unsigned RegNo);
+
+    virtual void EmitFunctionEntryLabel();
+    void EmitStartOfAsmFile(Module &M);
+  };
+} // end of anonymous namespace
+
+#include "MBlazeGenAsmWriter.inc"
+
+//===----------------------------------------------------------------------===//
+//
+//  MBlaze Asm Directives
+//
+//  -- Frame directive "frame Stackpointer, Stacksize, RARegister"
+//  Describe the stack frame.
+//
+//  -- Mask directives "mask  bitmask, offset"
+//  Tells the assembler which registers are saved and where.
+//  bitmask - contain a little endian bitset indicating which registers are
+//            saved on function prologue (e.g. with a 0x80000000 mask, the
+//            assembler knows the register 31 (RA) is saved at prologue.
+//  offset  - the position before stack pointer subtraction indicating where
+//            the first saved register on prologue is located. (e.g. with a
+//
+//  Consider the following function prologue:
+//
+//    .frame  R19,48,R15
+//    .mask   0xc0000000,-8
+//       addiu R1, R1, -48
+//       sw R15, 40(R1)
+//       sw R19, 36(R1)
+//
+//    With a 0xc0000000 mask, the assembler knows the register 15 (R15) and
+//    19 (R19) are saved at prologue. As the save order on prologue is from
+//    left to right, R15 is saved first. A -8 offset means that after the
+//    stack pointer subtration, the first register in the mask (R15) will be
+//    saved at address 48-8=40.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Mask directives
+//===----------------------------------------------------------------------===//
+
+// Create a bitmask with all callee saved registers for CPU or Floating Point
+// registers. For CPU registers consider RA, GP and FP for saving if necessary.
+void MBlazeAsmPrinter::printSavedRegsBitmask() {
+  const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+  const MBlazeFunctionInfo *MBlazeFI = MF->getInfo<MBlazeFunctionInfo>();
+
+  // CPU Saved Registers Bitmasks
+  unsigned int CPUBitmask = 0;
+
+  // Set the CPU Bitmasks
+  const MachineFrameInfo *MFI = MF->getFrameInfo();
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned RegNum = MBlazeRegisterInfo::getRegisterNumbering(CSI[i].getReg());
+    if (CSI[i].getRegClass() == MBlaze::CPURegsRegisterClass)
+      CPUBitmask |= (1 << RegNum);
+  }
+
+  // Return Address and Frame registers must also be set in CPUBitmask.
+  if (RI.hasFP(*MF))
+    CPUBitmask |= (1 << MBlazeRegisterInfo::
+                getRegisterNumbering(RI.getFrameRegister(*MF)));
+
+  if (MFI->hasCalls())
+    CPUBitmask |= (1 << MBlazeRegisterInfo::
+                getRegisterNumbering(RI.getRARegister()));
+
+  // Print CPUBitmask
+  O << "\t.mask \t"; printHex32(CPUBitmask); O << ','
+    << MBlazeFI->getCPUTopSavedRegOff() << '\n';
+}
+
+// Print a 32 bit hex number with all numbers.
+void MBlazeAsmPrinter::printHex32(unsigned int Value) {
+  O << "0x";
+  for (int i = 7; i >= 0; i--)
+    O << utohexstr( (Value & (0xF << (i*4))) >> (i*4) );
+}
+
+//===----------------------------------------------------------------------===//
+// Frame and Set directives
+//===----------------------------------------------------------------------===//
+
+/// Frame Directive
+void MBlazeAsmPrinter::emitFrameDirective() {
+  const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+
+  unsigned stackReg  = RI.getFrameRegister(*MF);
+  unsigned returnReg = RI.getRARegister();
+  unsigned stackSize = MF->getFrameInfo()->getStackSize();
+
+
+  O << "\t.frame\t" << getRegisterName(stackReg)
+                    << ',' << stackSize << ','
+                    << getRegisterName(returnReg)
+                    << '\n';
+}
+
+void MBlazeAsmPrinter::EmitFunctionEntryLabel() {
+      O << "\t.ent\t" << *CurrentFnSym << '\n';
+        OutStreamer.EmitLabel(CurrentFnSym);
+}
+
+/// EmitFunctionBodyStart - Targets can override this to emit stuff before
+/// the first basic block in the function.
+void MBlazeAsmPrinter::EmitFunctionBodyStart() {
+  emitFrameDirective();
+  printSavedRegsBitmask();
+}
+
+/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
+/// the last basic block in the function.
+void MBlazeAsmPrinter::EmitFunctionBodyEnd() {
+  O << "\t.end\t" << *CurrentFnSym << '\n';
+}
+
+// Print out an operand for an inline asm expression.
+bool MBlazeAsmPrinter::
+PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                unsigned AsmVariant,const char *ExtraCode){
+  // Does this asm operand have a single letter operand modifier?
+  if (ExtraCode && ExtraCode[0])
+    return true; // Unknown modifier.
+
+  printOperand(MI, OpNo);
+  return false;
+}
+
+void MBlazeAsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
+  const MachineOperand &MO = MI->getOperand(opNum);
+
+  switch (MO.getType()) {
+    case MachineOperand::MO_Register:
+      O << getRegisterName(MO.getReg());
+      break;
+
+    case MachineOperand::MO_Immediate:
+      O << (int)MO.getImm();
+      break;
+
+    case MachineOperand::MO_FPImmediate: {
+      const ConstantFP* fp = MO.getFPImm();
+      printHex32(fp->getValueAPF().bitcastToAPInt().getZExtValue());
+      O << ";\t# immediate = " << *fp;
+      break;
+      }
+
+    case MachineOperand::MO_MachineBasicBlock:
+      O << *MO.getMBB()->getSymbol(OutContext);
+      return;
+
+    case MachineOperand::MO_GlobalAddress:
+      O << *GetGlobalValueSymbol(MO.getGlobal());
+      break;
+
+    case MachineOperand::MO_ExternalSymbol:
+      O << *GetExternalSymbolSymbol(MO.getSymbolName());
+      break;
+
+    case MachineOperand::MO_JumpTableIndex:
+      O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+      << '_' << MO.getIndex();
+      break;
+
+    case MachineOperand::MO_ConstantPoolIndex:
+      O << MAI->getPrivateGlobalPrefix() << "CPI"
+        << getFunctionNumber() << "_" << MO.getIndex();
+      if (MO.getOffset())
+        O << "+" << MO.getOffset();
+      break;
+
+    default:
+      llvm_unreachable("<unknown operand type>");
+  }
+}
+
+void MBlazeAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum) {
+  const MachineOperand &MO = MI->getOperand(opNum);
+  if (MO.getType() == MachineOperand::MO_Immediate)
+    O << (unsigned int)MO.getImm();
+  else
+    printOperand(MI, opNum);
+}
+
+void MBlazeAsmPrinter::printFSLImm(const MachineInstr *MI, int opNum) {
+  const MachineOperand &MO = MI->getOperand(opNum);
+  if (MO.getType() == MachineOperand::MO_Immediate)
+    O << "rfsl" << (unsigned int)MO.getImm();
+  else
+    printOperand(MI, opNum);
+}
+
+void MBlazeAsmPrinter::
+printMemOperand(const MachineInstr *MI, int opNum, const char *Modifier) {
+  printOperand(MI, opNum+1);
+  O << ", ";
+  printOperand(MI, opNum);
+}
+
+void MBlazeAsmPrinter::
+printFCCOperand(const MachineInstr *MI, int opNum, const char *Modifier) {
+  const MachineOperand& MO = MI->getOperand(opNum);
+  O << MBlaze::MBlazeFCCToString((MBlaze::CondCode)MO.getImm());
+}
+
+void MBlazeAsmPrinter::EmitStartOfAsmFile(Module &M) {
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeMBlazeAsmPrinter() {
+  RegisterAsmPrinter<MBlazeAsmPrinter> X(TheMBlazeTarget);
+}
diff --git a/lib/Target/MBlaze/AsmPrinter/Makefile b/lib/Target/MBlaze/AsmPrinter/Makefile
new file mode 100644
index 0000000..c8e4d8f
--- /dev/null
+++ b/lib/Target/MBlaze/AsmPrinter/Makefile
@@ -0,0 +1,17 @@
+##===- lib/Target/MBlaze/AsmPrinter/Makefile ---------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMBlazeAsmPrinter
+
+# Hack: we need to include 'main' MBlaze target directory to grab
+# private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/MBlaze/CMakeLists.txt b/lib/Target/MBlaze/CMakeLists.txt
new file mode 100644
index 0000000..c93e3df
--- /dev/null
+++ b/lib/Target/MBlaze/CMakeLists.txt
@@ -0,0 +1,27 @@
+set(LLVM_TARGET_DEFINITIONS MBlaze.td)
+
+tablegen(MBlazeGenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(MBlazeGenRegisterNames.inc -gen-register-enums)
+tablegen(MBlazeGenRegisterInfo.inc -gen-register-desc)
+tablegen(MBlazeGenInstrNames.inc -gen-instr-enums)
+tablegen(MBlazeGenInstrInfo.inc -gen-instr-desc)
+tablegen(MBlazeGenAsmWriter.inc -gen-asm-writer)
+tablegen(MBlazeGenDAGISel.inc -gen-dag-isel)
+tablegen(MBlazeGenCallingConv.inc -gen-callingconv)
+tablegen(MBlazeGenSubtarget.inc -gen-subtarget)
+tablegen(MBlazeGenIntrinsics.inc -gen-tgt-intrinsic)
+
+add_llvm_target(MBlazeCodeGen
+  MBlazeDelaySlotFiller.cpp
+  MBlazeInstrInfo.cpp
+  MBlazeISelDAGToDAG.cpp
+  MBlazeISelLowering.cpp
+  MBlazeMCAsmInfo.cpp
+  MBlazeRegisterInfo.cpp
+  MBlazeSubtarget.cpp
+  MBlazeTargetMachine.cpp
+  MBlazeTargetObjectFile.cpp
+  MBlazeIntrinsicInfo.cpp
+  )
+
+target_link_libraries (LLVMMBlazeCodeGen LLVMSelectionDAG)
diff --git a/lib/Target/MBlaze/MBlaze.h b/lib/Target/MBlaze/MBlaze.h
new file mode 100644
index 0000000..f9d828b
--- /dev/null
+++ b/lib/Target/MBlaze/MBlaze.h
@@ -0,0 +1,39 @@
+//===-- MBlaze.h - Top-level interface for MBlaze ---------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM MBlaze back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_MBLAZE_H
+#define TARGET_MBLAZE_H
+
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+  class MBlazeTargetMachine;
+  class FunctionPass;
+  class MachineCodeEmitter;
+  class formatted_raw_ostream;
+
+  FunctionPass *createMBlazeISelDag(MBlazeTargetMachine &TM);
+  FunctionPass *createMBlazeDelaySlotFillerPass(MBlazeTargetMachine &TM);
+
+  extern Target TheMBlazeTarget;
+} // end namespace llvm;
+
+// Defines symbolic names for MBlaze registers.  This defines a mapping from
+// register name to register number.
+#include "MBlazeGenRegisterNames.inc"
+
+// Defines symbolic names for the MBlaze instructions.
+#include "MBlazeGenInstrNames.inc"
+
+#endif
diff --git a/lib/Target/MBlaze/MBlaze.td b/lib/Target/MBlaze/MBlaze.td
new file mode 100644
index 0000000..1679752
--- /dev/null
+++ b/lib/Target/MBlaze/MBlaze.td
@@ -0,0 +1,85 @@
+//===- MBlaze.td - Describe the MBlaze Target Machine -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the MBlaze target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "MBlazeRegisterInfo.td"
+include "MBlazeSchedule.td"
+include "MBlazeIntrinsics.td"
+include "MBlazeInstrInfo.td"
+include "MBlazeCallingConv.td"
+
+def MBlazeInstrInfo : InstrInfo {
+  let TSFlagsFields = [];
+  let TSFlagsShifts = [];
+}
+
+
+//===----------------------------------------------------------------------===//
+// Microblaze Subtarget features                                              //
+//===----------------------------------------------------------------------===//
+
+def FeaturePipe3       : SubtargetFeature<"pipe3", "HasPipe3", "true",
+                                "Implements 3-stage pipeline.">;
+def FeatureBarrel      : SubtargetFeature<"barrel", "HasBarrel", "true",
+                                "Implements barrel shifter.">;
+def FeatureDiv         : SubtargetFeature<"div", "HasDiv", "true",
+                                "Implements hardware divider.">;
+def FeatureMul         : SubtargetFeature<"mul", "HasMul", "true",
+                                "Implements hardware multiplier.">;
+def FeatureFSL         : SubtargetFeature<"fsl", "HasFSL", "true",
+                                "Implements FSL instructions.">;
+def FeatureEFSL        : SubtargetFeature<"efsl", "HasEFSL", "true",
+                                "Implements extended FSL instructions.">;
+def FeatureMSRSet      : SubtargetFeature<"msrset", "HasMSRSet", "true",
+                                "Implements MSR register set and clear.">;
+def FeatureException   : SubtargetFeature<"exception", "HasException", "true",
+                                "Implements hardware exception support.">;
+def FeaturePatCmp      : SubtargetFeature<"patcmp", "HasPatCmp", "true",
+                                "Implements pattern compare instruction.">;
+def FeatureFPU         : SubtargetFeature<"fpu", "HasFPU", "true",
+                                "Implements floating point unit.">;
+def FeatureESR         : SubtargetFeature<"esr", "HasESR", "true",
+                                "Implements ESR and EAR registers">;
+def FeaturePVR         : SubtargetFeature<"pvr", "HasPVR", "true",
+                                "Implements processor version register.">;
+def FeatureMul64       : SubtargetFeature<"mul64", "HasMul64", "true",
+                                "Implements multiplier with 64-bit result">;
+def FeatureSqrt        : SubtargetFeature<"sqrt", "HasSqrt", "true",
+                                "Implements sqrt and floating point convert.">;
+def FeatureMMU         : SubtargetFeature<"mmu", "HasMMU", "true",
+                                "Implements memory management unit.">;
+
+//===----------------------------------------------------------------------===//
+// MBlaze processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, MBlazeGenericItineraries, Features>;
+
+
+def : Proc<"v400", []>;
+def : Proc<"v500", []>;
+def : Proc<"v600", []>;
+def : Proc<"v700", []>;
+def : Proc<"v710", []>;
+
+def MBlaze : Target {
+  let InstructionSet = MBlazeInstrInfo;
+}
diff --git a/lib/Target/MBlaze/MBlazeCallingConv.td b/lib/Target/MBlaze/MBlazeCallingConv.td
new file mode 100644
index 0000000..ddd4998
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeCallingConv.td
@@ -0,0 +1,26 @@
+//===- MBlazeCallingConv.td - Calling Conventions for MBlaze ----*- C++ -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for MBlaze architecture.
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>: 
+  CCIf<!strconcat("State.getTarget().getSubtarget<MBlazeSubtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze ABI Calling Convention
+//===----------------------------------------------------------------------===//
+
+def RetCC_MBlaze : CallingConv<[
+  // i32 are returned in registers R3, R4
+  CCIfType<[i32], CCAssignToReg<[R3, R4]>>,
+
+  // f32 are returned in registers F3, F4
+  CCIfType<[f32], CCAssignToReg<[F3, F4]>>
+]>;
diff --git a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
new file mode 100644
index 0000000..42fea25
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
@@ -0,0 +1,75 @@
+//===-- DelaySlotFiller.cpp - MBlaze delay slot filler --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Simple pass to fills delay slots with NOPs.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "delay-slot-filler"
+
+#include "MBlaze.h"
+#include "MBlazeTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+
+using namespace llvm;
+
+STATISTIC(FilledSlots, "Number of delay slots filled");
+
+namespace {
+  struct Filler : public MachineFunctionPass {
+
+    TargetMachine &TM;
+    const TargetInstrInfo *TII;
+
+    static char ID;
+    Filler(TargetMachine &tm) 
+      : MachineFunctionPass(&ID), TM(tm), TII(tm.getInstrInfo()) { }
+
+    virtual const char *getPassName() const {
+      return "MBlaze Delay Slot Filler";
+    }
+
+    bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+    bool runOnMachineFunction(MachineFunction &F) {
+      bool Changed = false;
+      for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+           FI != FE; ++FI)
+        Changed |= runOnMachineBasicBlock(*FI);
+      return Changed;
+    }
+
+  };
+  char Filler::ID = 0;
+} // end of anonymous namespace
+
+/// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
+/// Currently, we fill delay slots with NOPs. We assume there is only one
+/// delay slot per delayed instruction.
+bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+  bool Changed = false;
+  for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
+    if (I->getDesc().hasDelaySlot()) {
+      MachineBasicBlock::iterator J = I;
+      ++J;
+      BuildMI(MBB, J, I->getDebugLoc(), TII->get(MBlaze::NOP));
+      ++FilledSlots;
+      Changed = true;
+    }
+  return Changed;
+}
+
+/// createMBlazeDelaySlotFillerPass - Returns a pass that fills in delay
+/// slots in MBlaze MachineFunctions
+FunctionPass *llvm::createMBlazeDelaySlotFillerPass(MBlazeTargetMachine &tm) {
+  return new Filler(tm);
+}
+
diff --git a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
new file mode 100644
index 0000000..7e59c4a
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
@@ -0,0 +1,339 @@
+//===-- MBlazeISelDAGToDAG.cpp - A dag to dag inst selector for MBlaze ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the MBlaze target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-isel"
+#include "MBlaze.h"
+#include "MBlazeISelLowering.h"
+#include "MBlazeMachineFunction.h"
+#include "MBlazeRegisterInfo.h"
+#include "MBlazeSubtarget.h"
+#include "MBlazeTargetMachine.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlazeDAGToDAGISel - MBlaze specific code to select MBlaze machine
+// instructions for SelectionDAG operations.
+//===----------------------------------------------------------------------===//
+namespace {
+
+class MBlazeDAGToDAGISel : public SelectionDAGISel {
+
+  /// TM - Keep a reference to MBlazeTargetMachine.
+  MBlazeTargetMachine &TM;
+
+  /// Subtarget - Keep a pointer to the MBlazeSubtarget around so that we can
+  /// make the right decision when generating code for different targets.
+  const MBlazeSubtarget &Subtarget;
+
+public:
+  explicit MBlazeDAGToDAGISel(MBlazeTargetMachine &tm) :
+  SelectionDAGISel(tm),
+  TM(tm), Subtarget(tm.getSubtarget<MBlazeSubtarget>()) {}
+
+  // Pass Name
+  virtual const char *getPassName() const {
+    return "MBlaze DAG->DAG Pattern Instruction Selection";
+  }
+private:
+  // Include the pieces autogenerated from the target description.
+  #include "MBlazeGenDAGISel.inc"
+
+  /// getTargetMachine - Return a reference to the TargetMachine, casted
+  /// to the target-specific type.
+  const MBlazeTargetMachine &getTargetMachine() {
+    return static_cast<const MBlazeTargetMachine &>(TM);
+  }
+
+  /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
+  /// to the target-specific type.
+  const MBlazeInstrInfo *getInstrInfo() {
+    return getTargetMachine().getInstrInfo();
+  }
+
+  SDNode *getGlobalBaseReg();
+  SDNode *Select(SDNode *N);
+
+  // Complex Pattern.
+  bool SelectAddr(SDNode *Op, SDValue N,
+                  SDValue &Base, SDValue &Offset);
+
+  // Address Selection
+  bool SelectAddrRegReg(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index);
+  bool SelectAddrRegImm(SDNode *Op, SDValue N, SDValue &Disp, SDValue &Base);
+
+  // getI32Imm - Return a target constant with the specified value, of type i32.
+  inline SDValue getI32Imm(unsigned Imm) {
+    return CurDAG->getTargetConstant(Imm, MVT::i32);
+  }
+};
+
+}
+
+/// isIntS32Immediate - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// sign extension from a 32-bit value.  If so, this returns true and the
+/// immediate.
+static bool isIntS32Immediate(SDNode *N, int32_t &Imm) {
+  unsigned Opc = N->getOpcode();
+  if (Opc != ISD::Constant)
+    return false;
+
+  Imm = (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
+  if (N->getValueType(0) == MVT::i32)
+    return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
+  else
+    return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
+}
+
+static bool isIntS32Immediate(SDValue Op, int32_t &Imm) {
+  return isIntS32Immediate(Op.getNode(), Imm);
+}
+
+
+/// SelectAddressRegReg - Given the specified addressed, check to see if it
+/// can be represented as an indexed [r+r] operation.  Returns false if it
+/// can be more efficiently represented with [r+imm].
+bool MBlazeDAGToDAGISel::
+SelectAddrRegReg(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index) {
+  if (N.getOpcode() == ISD::FrameIndex) return false;
+  if (N.getOpcode() == ISD::TargetExternalSymbol ||
+      N.getOpcode() == ISD::TargetGlobalAddress)
+    return false;  // direct calls.
+
+  int32_t imm = 0;
+  if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
+    if (isIntS32Immediate(N.getOperand(1), imm))
+      return false;    // r+i
+
+    if (N.getOperand(0).getOpcode() == ISD::TargetJumpTable ||
+        N.getOperand(1).getOpcode() == ISD::TargetJumpTable)
+      return false; // jump tables.
+
+    Base = N.getOperand(1);
+    Index = N.getOperand(0);
+    return true;
+  }
+
+  return false;
+}
+
+/// Returns true if the address N can be represented by a base register plus
+/// a signed 32-bit displacement [r+imm], and if it is not better
+/// represented as reg+reg.
+bool MBlazeDAGToDAGISel::
+SelectAddrRegImm(SDNode *Op, SDValue N, SDValue &Disp, SDValue &Base) {
+  // If this can be more profitably realized as r+r, fail.
+  if (SelectAddrRegReg(Op, N, Disp, Base))
+    return false;
+
+  if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
+    int32_t imm = 0;
+    if (isIntS32Immediate(N.getOperand(1), imm)) {
+      Disp = CurDAG->getTargetConstant(imm, MVT::i32);
+      if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
+        Base = CurDAG->getTargetFrameIndex(FI->getIndex(), N.getValueType());
+      } else {
+        Base = N.getOperand(0);
+      }
+      DEBUG( errs() << "WESLEY: Using Operand Immediate\n" );
+      return true; // [r+i]
+    }
+  } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
+    // Loading from a constant address.
+    uint32_t Imm = CN->getZExtValue();
+    Disp = CurDAG->getTargetConstant(Imm, CN->getValueType(0));
+    Base = CurDAG->getRegister(MBlaze::R0, CN->getValueType(0));
+    DEBUG( errs() << "WESLEY: Using Constant Node\n" );
+    return true;
+  }
+
+  Disp = CurDAG->getTargetConstant(0, TM.getTargetLowering()->getPointerTy());
+  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
+    Base = CurDAG->getTargetFrameIndex(FI->getIndex(), N.getValueType());
+  else
+    Base = N;
+  return true;      // [r+0]
+}
+
+/// getGlobalBaseReg - Output the instructions required to put the
+/// GOT address into a register.
+SDNode *MBlazeDAGToDAGISel::getGlobalBaseReg() {
+  unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
+  return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
+}
+
+/// ComplexPattern used on MBlazeInstrInfo
+/// Used on MBlaze Load/Store instructions
+bool MBlazeDAGToDAGISel::
+SelectAddr(SDNode *Op, SDValue Addr, SDValue &Offset, SDValue &Base) {
+  // if Address is FI, get the TargetFrameIndex.
+  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+    Base   = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+
+  // on PIC code Load GA
+  if (TM.getRelocationModel() == Reloc::PIC_) {
+    if ((Addr.getOpcode() == ISD::TargetGlobalAddress) ||
+        (Addr.getOpcode() == ISD::TargetConstantPool) ||
+        (Addr.getOpcode() == ISD::TargetJumpTable)){
+      Base   = CurDAG->getRegister(MBlaze::R15, MVT::i32);
+      Offset = Addr;
+      return true;
+    }
+  } else {
+    if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
+        Addr.getOpcode() == ISD::TargetGlobalAddress))
+      return false;
+  }
+
+  // Operand is a result from an ADD.
+  if (Addr.getOpcode() == ISD::ADD) {
+    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
+      if (Predicate_immSExt16(CN)) {
+
+        // If the first operand is a FI, get the TargetFI Node
+        if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
+                                    (Addr.getOperand(0))) {
+          Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+        } else {
+          Base = Addr.getOperand(0);
+        }
+
+        Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
+        return true;
+      }
+    }
+  }
+
+  Base   = Addr;
+  Offset = CurDAG->getTargetConstant(0, MVT::i32);
+  return true;
+}
+
+/// Select instructions not customized! Used for
+/// expanded, promoted and normal instructions
+SDNode* MBlazeDAGToDAGISel::Select(SDNode *Node) {
+  unsigned Opcode = Node->getOpcode();
+  DebugLoc dl = Node->getDebugLoc();
+
+  // Dump information about the Node being selected
+  DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
+
+  // If we have a custom node, we already have selected!
+  if (Node->isMachineOpcode()) {
+    DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
+    return NULL;
+  }
+
+  ///
+  // Instruction Selection not handled by the auto-generated
+  // tablegen selection should be handled here.
+  ///
+  switch(Opcode) {
+    default: break;
+
+    // Get target GOT address.
+    case ISD::GLOBAL_OFFSET_TABLE:
+      return getGlobalBaseReg();
+
+    case ISD::FrameIndex: {
+        SDValue imm = CurDAG->getTargetConstant(0, MVT::i32);
+        int FI = dyn_cast<FrameIndexSDNode>(Node)->getIndex();
+        EVT VT = Node->getValueType(0);
+        SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT);
+        unsigned Opc = MBlaze::ADDI;
+        if (Node->hasOneUse())
+          return CurDAG->SelectNodeTo(Node, Opc, VT, TFI, imm);
+        return CurDAG->getMachineNode(Opc, dl, VT, TFI, imm);
+    }
+
+
+    /// Handle direct and indirect calls when using PIC. On PIC, when
+    /// GOT is smaller than about 64k (small code) the GA target is
+    /// loaded with only one instruction. Otherwise GA's target must
+    /// be loaded with 3 instructions.
+    case MBlazeISD::JmpLink: {
+      if (TM.getRelocationModel() == Reloc::PIC_) {
+        SDValue Chain  = Node->getOperand(0);
+        SDValue Callee = Node->getOperand(1);
+        SDValue R20Reg = CurDAG->getRegister(MBlaze::R20, MVT::i32);
+        SDValue InFlag(0, 0);
+
+        if ( (isa<GlobalAddressSDNode>(Callee)) ||
+             (isa<ExternalSymbolSDNode>(Callee)) )
+        {
+          /// Direct call for global addresses and external symbols
+          SDValue GPReg = CurDAG->getRegister(MBlaze::R15, MVT::i32);
+
+          // Use load to get GOT target
+          SDValue Ops[] = { Callee, GPReg, Chain };
+          SDValue Load = SDValue(CurDAG->getMachineNode(MBlaze::LW, dl,
+                                 MVT::i32, MVT::Other, Ops, 3), 0);
+          Chain = Load.getValue(1);
+
+          // Call target must be on T9
+          Chain = CurDAG->getCopyToReg(Chain, dl, R20Reg, Load, InFlag);
+        } else
+          /// Indirect call
+          Chain = CurDAG->getCopyToReg(Chain, dl, R20Reg, Callee, InFlag);
+
+        // Emit Jump and Link Register
+        SDNode *ResNode = CurDAG->getMachineNode(MBlaze::BRLID, dl, MVT::Other,
+                                                 MVT::Flag, R20Reg, Chain);
+        Chain  = SDValue(ResNode, 0);
+        InFlag = SDValue(ResNode, 1);
+        ReplaceUses(SDValue(Node, 0), Chain);
+        ReplaceUses(SDValue(Node, 1), InFlag);
+        return ResNode;
+      }
+    }
+  }
+
+  // Select the default instruction
+  SDNode *ResNode = SelectCode(Node);
+
+  DEBUG(errs() << "=> ");
+  if (ResNode == NULL || ResNode == Node)
+    DEBUG(Node->dump(CurDAG));
+  else
+    DEBUG(ResNode->dump(CurDAG));
+  DEBUG(errs() << "\n");
+  return ResNode;
+}
+
+/// createMBlazeISelDag - This pass converts a legalized DAG into a
+/// MBlaze-specific DAG, ready for instruction scheduling.
+FunctionPass *llvm::createMBlazeISelDag(MBlazeTargetMachine &TM) {
+  return new MBlazeDAGToDAGISel(TM);
+}
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp
new file mode 100644
index 0000000..f0864d0
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -0,0 +1,975 @@
+//===-- MBlazeISelLowering.cpp - MBlaze DAG Lowering Implementation -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that MBlaze uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-lower"
+#include "MBlazeISelLowering.h"
+#include "MBlazeMachineFunction.h"
+#include "MBlazeTargetMachine.h"
+#include "MBlazeTargetObjectFile.h"
+#include "MBlazeSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+const char *MBlazeTargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
+    case MBlazeISD::JmpLink    : return "MBlazeISD::JmpLink";
+    case MBlazeISD::GPRel      : return "MBlazeISD::GPRel";
+    case MBlazeISD::Wrap       : return "MBlazeISD::Wrap";
+    case MBlazeISD::ICmp       : return "MBlazeISD::ICmp";
+    case MBlazeISD::Ret        : return "MBlazeISD::Ret";
+    case MBlazeISD::Select_CC  : return "MBlazeISD::Select_CC";
+    default                    : return NULL;
+  }
+}
+
+MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
+  : TargetLowering(TM, new MBlazeTargetObjectFile()) {
+  Subtarget = &TM.getSubtarget<MBlazeSubtarget>();
+
+  // MBlaze does not have i1 type, so use i32 for
+  // setcc operations results (slt, sgt, ...).
+  setBooleanContents(ZeroOrOneBooleanContent);
+
+  // Set up the register classes
+  addRegisterClass(MVT::i32, MBlaze::CPURegsRegisterClass);
+  if (Subtarget->hasFPU()) {
+    addRegisterClass(MVT::f32, MBlaze::FGR32RegisterClass);
+    setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+  }
+
+  // Floating point operations which are not supported
+  setOperationAction(ISD::FREM,       MVT::f32, Expand);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Expand);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i16, Expand);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+  setOperationAction(ISD::FP_ROUND,   MVT::f32, Expand);
+  setOperationAction(ISD::FP_ROUND,   MVT::f64, Expand);
+  setOperationAction(ISD::FCOPYSIGN,  MVT::f32, Expand);
+  setOperationAction(ISD::FCOPYSIGN,  MVT::f64, Expand);
+  setOperationAction(ISD::FSIN,       MVT::f32, Expand);
+  setOperationAction(ISD::FCOS,       MVT::f32, Expand);
+  setOperationAction(ISD::FPOWI,      MVT::f32, Expand);
+  setOperationAction(ISD::FPOW,       MVT::f32, Expand);
+  setOperationAction(ISD::FLOG,       MVT::f32, Expand);
+  setOperationAction(ISD::FLOG2,      MVT::f32, Expand);
+  setOperationAction(ISD::FLOG10,     MVT::f32, Expand);
+  setOperationAction(ISD::FEXP,       MVT::f32, Expand);
+
+  // Load extented operations for i1 types must be promoted
+  setLoadExtAction(ISD::EXTLOAD,  MVT::i1,  Promote);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1,  Promote);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1,  Promote);
+
+  // MBlaze has no REM or DIVREM operations.
+  setOperationAction(ISD::UREM,    MVT::i32, Expand);
+  setOperationAction(ISD::SREM,    MVT::i32, Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+
+  // If the processor doesn't support multiply then expand it
+  if (!Subtarget->hasMul()) {
+    setOperationAction(ISD::MUL, MVT::i32, Expand);
+  }
+
+  // If the processor doesn't support 64-bit multiply then expand
+  if (!Subtarget->hasMul() || !Subtarget->hasMul64()) {
+    setOperationAction(ISD::MULHS, MVT::i32, Expand);
+    setOperationAction(ISD::MULHS, MVT::i64, Expand);
+    setOperationAction(ISD::MULHU, MVT::i32, Expand);
+    setOperationAction(ISD::MULHU, MVT::i64, Expand);
+  }
+
+  // If the processor doesn't support division then expand
+  if (!Subtarget->hasDiv()) {
+    setOperationAction(ISD::UDIV, MVT::i32, Expand);
+    setOperationAction(ISD::SDIV, MVT::i32, Expand);
+  }
+
+  // Expand unsupported conversions
+  setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
+  setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
+
+  // Expand SELECT_CC
+  setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+
+  // MBlaze doesn't have MUL_LOHI
+  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+
+  // Used by legalize types to correctly generate the setcc result.
+  // Without this, every float setcc comes with a AND/OR with the result,
+  // we don't want this, since the fpcmp result goes to a flag register,
+  // which is used implicitly by brcond and select operations.
+  AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32);
+  AddPromotedToType(ISD::SELECT, MVT::i1, MVT::i32);
+  AddPromotedToType(ISD::SELECT_CC, MVT::i1, MVT::i32);
+
+  // MBlaze Custom Operations
+  setOperationAction(ISD::GlobalAddress,      MVT::i32,   Custom);
+  setOperationAction(ISD::GlobalTLSAddress,   MVT::i32,   Custom);
+  setOperationAction(ISD::JumpTable,          MVT::i32,   Custom);
+  setOperationAction(ISD::ConstantPool,       MVT::i32,   Custom);
+
+  // Variable Argument support
+  setOperationAction(ISD::VASTART,            MVT::Other, Custom);
+  setOperationAction(ISD::VAEND,              MVT::Other, Expand);
+  setOperationAction(ISD::VAARG,              MVT::Other, Expand);
+  setOperationAction(ISD::VACOPY,             MVT::Other, Expand);
+
+
+  // Operations not directly supported by MBlaze.
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32,   Expand);
+  setOperationAction(ISD::BR_JT,              MVT::Other, Expand);
+  setOperationAction(ISD::BR_CC,              MVT::Other, Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG,  MVT::i1,    Expand);
+  setOperationAction(ISD::ROTL,               MVT::i32,   Expand);
+  setOperationAction(ISD::ROTR,               MVT::i32,   Expand);
+  setOperationAction(ISD::SHL_PARTS,          MVT::i32,   Expand);
+  setOperationAction(ISD::SRA_PARTS,          MVT::i32,   Expand);
+  setOperationAction(ISD::SRL_PARTS,          MVT::i32,   Expand);
+  setOperationAction(ISD::CTLZ,               MVT::i32,   Expand);
+  setOperationAction(ISD::CTTZ,               MVT::i32,   Expand);
+  setOperationAction(ISD::CTPOP,              MVT::i32,   Expand);
+  setOperationAction(ISD::BSWAP,              MVT::i32,   Expand);
+
+  // We don't have line number support yet.
+  setOperationAction(ISD::EH_LABEL,          MVT::Other, Expand);
+
+  // Use the default for now
+  setOperationAction(ISD::STACKSAVE,         MVT::Other, Expand);
+  setOperationAction(ISD::STACKRESTORE,      MVT::Other, Expand);
+  setOperationAction(ISD::MEMBARRIER,        MVT::Other, Expand);
+
+  // MBlaze doesn't have extending float->double load/store
+  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+  setStackPointerRegisterToSaveRestore(MBlaze::R1);
+  computeRegisterProperties();
+}
+
+MVT::SimpleValueType MBlazeTargetLowering::getSetCCResultType(EVT VT) const {
+  return MVT::i32;
+}
+
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned MBlazeTargetLowering::getFunctionAlignment(const Function *) const {
+  return 2;
+}
+
+SDValue MBlazeTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+  switch (Op.getOpcode())
+  {
+    case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
+    case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
+    case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
+    case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
+    case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
+    case ISD::VASTART:            return LowerVASTART(Op, DAG);
+  }
+  return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+//  Lower helper functions
+//===----------------------------------------------------------------------===//
+MachineBasicBlock* MBlazeTargetLowering::
+EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB,
+                            DenseMap<MachineBasicBlock*,
+                            MachineBasicBlock*> *EM) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc dl = MI->getDebugLoc();
+
+  switch (MI->getOpcode()) {
+  default: assert(false && "Unexpected instr type to insert");
+  case MBlaze::ShiftRL:
+  case MBlaze::ShiftRA:
+  case MBlaze::ShiftL: {
+    // To "insert" a shift left instruction, we actually have to insert a
+    // simple loop.  The incoming instruction knows the destination vreg to
+    // set, the source vreg to operate over and the shift amount.
+    const BasicBlock *LLVM_BB = BB->getBasicBlock();
+    MachineFunction::iterator It = BB;
+    ++It;
+
+    // start:
+    //   andi     samt, samt, 31
+    //   beqid    samt, finish
+    //   add      dst, src, r0
+    // loop:
+    //   addik    samt, samt, -1
+    //   sra      dst, dst
+    //   bneid    samt, loop
+    //   nop
+    // finish:
+    MachineFunction *F = BB->getParent();
+    MachineRegisterInfo &R = F->getRegInfo();
+    MachineBasicBlock *loop = F->CreateMachineBasicBlock(LLVM_BB);
+    MachineBasicBlock *finish = F->CreateMachineBasicBlock(LLVM_BB);
+
+    unsigned IAMT = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
+    BuildMI(BB, dl, TII->get(MBlaze::ANDI), IAMT)
+      .addReg(MI->getOperand(2).getReg())
+      .addImm(31);
+
+    unsigned IVAL = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
+    BuildMI(BB, dl, TII->get(MBlaze::ADDI), IVAL)
+      .addReg(MI->getOperand(1).getReg())
+      .addImm(0);
+
+    BuildMI(BB, dl, TII->get(MBlaze::BEQID))
+      .addReg(IAMT)
+      .addMBB(finish);
+
+    F->insert(It, loop);
+    F->insert(It, finish);
+
+    // Update machine-CFG edges by first adding all successors of the current
+    // block to the new block which will contain the Phi node for the select.
+    // Also inform sdisel of the edge changes.
+    for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
+          e = BB->succ_end(); i != e; ++i) {
+      EM->insert(std::make_pair(*i, finish));
+      finish->addSuccessor(*i);
+    }
+
+    // Next, remove all successors of the current block, and add the true
+    // and fallthrough blocks as its successors.
+    while(!BB->succ_empty())
+      BB->removeSuccessor(BB->succ_begin());
+    BB->addSuccessor(loop);
+    BB->addSuccessor(finish);
+
+    // Next, add the finish block as a successor of the loop block
+    loop->addSuccessor(finish);
+    loop->addSuccessor(loop);
+
+    unsigned DST = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
+    unsigned NDST = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
+    BuildMI(loop, dl, TII->get(MBlaze::PHI), DST)
+      .addReg(IVAL).addMBB(BB)
+      .addReg(NDST).addMBB(loop);
+
+    unsigned SAMT = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
+    unsigned NAMT = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
+    BuildMI(loop, dl, TII->get(MBlaze::PHI), SAMT)
+      .addReg(IAMT).addMBB(BB)
+      .addReg(NAMT).addMBB(loop);
+
+    if (MI->getOpcode() == MBlaze::ShiftL)
+      BuildMI(loop, dl, TII->get(MBlaze::ADD), NDST).addReg(DST).addReg(DST);
+    else if (MI->getOpcode() == MBlaze::ShiftRA)
+      BuildMI(loop, dl, TII->get(MBlaze::SRA), NDST).addReg(DST);
+    else if (MI->getOpcode() == MBlaze::ShiftRL)
+      BuildMI(loop, dl, TII->get(MBlaze::SRL), NDST).addReg(DST);
+    else
+        llvm_unreachable( "Cannot lower unknown shift instruction" );
+
+    BuildMI(loop, dl, TII->get(MBlaze::ADDI), NAMT)
+      .addReg(SAMT)
+      .addImm(-1);
+
+    BuildMI(loop, dl, TII->get(MBlaze::BNEID))
+      .addReg(NAMT)
+      .addMBB(loop);
+
+    BuildMI(finish, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+      .addReg(IVAL).addMBB(BB)
+      .addReg(NDST).addMBB(loop);
+
+    // The pseudo instruction is no longer needed so remove it
+    F->DeleteMachineInstr(MI);
+    return finish;
+    }
+
+  case MBlaze::Select_FCC:
+  case MBlaze::Select_CC: {
+    // To "insert" a SELECT_CC instruction, we actually have to insert the
+    // diamond control-flow pattern.  The incoming instruction knows the
+    // destination vreg to set, the condition code register to branch on, the
+    // true/false values to select between, and a branch opcode to use.
+    const BasicBlock *LLVM_BB = BB->getBasicBlock();
+    MachineFunction::iterator It = BB;
+    ++It;
+
+    //  thisMBB:
+    //  ...
+    //   TrueVal = ...
+    //   setcc r1, r2, r3
+    //   bNE   r1, r0, copy1MBB
+    //   fallthrough --> copy0MBB
+    MachineFunction *F = BB->getParent();
+    MachineBasicBlock *flsBB = F->CreateMachineBasicBlock(LLVM_BB);
+    MachineBasicBlock *dneBB = F->CreateMachineBasicBlock(LLVM_BB);
+
+    unsigned Opc;
+    switch (MI->getOperand(4).getImm()) {
+    default: llvm_unreachable( "Unknown branch condition" );
+    case MBlazeCC::EQ: Opc = MBlaze::BNEID; break;
+    case MBlazeCC::NE: Opc = MBlaze::BEQID; break;
+    case MBlazeCC::GT: Opc = MBlaze::BLEID; break;
+    case MBlazeCC::LT: Opc = MBlaze::BGEID; break;
+    case MBlazeCC::GE: Opc = MBlaze::BLTID; break;
+    case MBlazeCC::LE: Opc = MBlaze::BGTID; break;
+    }
+
+    BuildMI(BB, dl, TII->get(Opc))
+      .addReg(MI->getOperand(3).getReg())
+      .addMBB(dneBB);
+
+    F->insert(It, flsBB);
+    F->insert(It, dneBB);
+
+    // Update machine-CFG edges by first adding all successors of the current
+    // block to the new block which will contain the Phi node for the select.
+    // Also inform sdisel of the edge changes.
+    for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
+          e = BB->succ_end(); i != e; ++i) {
+      EM->insert(std::make_pair(*i, dneBB));
+      dneBB->addSuccessor(*i);
+    }
+
+    // Next, remove all successors of the current block, and add the true
+    // and fallthrough blocks as its successors.
+    while(!BB->succ_empty())
+      BB->removeSuccessor(BB->succ_begin());
+    BB->addSuccessor(flsBB);
+    BB->addSuccessor(dneBB);
+    flsBB->addSuccessor(dneBB);
+
+    //  sinkMBB:
+    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+    //  ...
+    //BuildMI(dneBB, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+    //  .addReg(MI->getOperand(1).getReg()).addMBB(flsBB)
+    //  .addReg(MI->getOperand(2).getReg()).addMBB(BB);
+
+    BuildMI(dneBB, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+      .addReg(MI->getOperand(2).getReg()).addMBB(flsBB)
+      .addReg(MI->getOperand(1).getReg()).addMBB(BB);
+
+    F->DeleteMachineInstr(MI);   // The pseudo instruction is gone now.
+    return dneBB;
+  }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//  Misc Lower Operation implementation
+//===----------------------------------------------------------------------===//
+//
+
+SDValue MBlazeTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  SDValue TrueVal = Op.getOperand(2);
+  SDValue FalseVal = Op.getOperand(3);
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned Opc;
+
+  SDValue CompareFlag;
+  if (LHS.getValueType() == MVT::i32) {
+    Opc = MBlazeISD::Select_CC;
+    CompareFlag = DAG.getNode(MBlazeISD::ICmp, dl, MVT::i32, LHS, RHS)
+                    .getValue(1);
+  } else {
+    llvm_unreachable( "Cannot lower select_cc with unknown type" );
+  }
+ 
+  return DAG.getNode(Opc, dl, TrueVal.getValueType(), TrueVal, FalseVal,
+                     CompareFlag);
+}
+
+SDValue MBlazeTargetLowering::
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) {
+  // FIXME there isn't actually debug info here
+  DebugLoc dl = Op.getDebugLoc();
+  GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32);
+
+  return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, GA);
+}
+
+SDValue MBlazeTargetLowering::
+LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) {
+  llvm_unreachable("TLS not implemented for MicroBlaze.");
+  return SDValue(); // Not reached
+}
+
+SDValue MBlazeTargetLowering::
+LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
+  SDValue ResNode;
+  SDValue HiPart;
+  // FIXME there isn't actually debug info here
+  DebugLoc dl = Op.getDebugLoc();
+  bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
+  unsigned char OpFlag = IsPIC ? MBlazeII::MO_GOT : MBlazeII::MO_ABS_HILO;
+
+  EVT PtrVT = Op.getValueType();
+  JumpTableSDNode *JT  = cast<JumpTableSDNode>(Op);
+
+  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag);
+  return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, JTI);
+  //return JTI;
+}
+
+SDValue MBlazeTargetLowering::
+LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
+  SDValue ResNode;
+  EVT PtrVT = Op.getValueType();
+  ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
+  Constant *C = N->getConstVal();
+  SDValue Zero = DAG.getConstant(0, PtrVT);
+  DebugLoc dl = Op.getDebugLoc();
+
+  SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
+                                         N->getOffset(), MBlazeII::MO_ABS_HILO);
+  return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, CP);
+}
+
+SDValue MBlazeTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) {
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue FI = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
+
+  // vastart just stores the address of the VarArgsFrameIndex slot into the
+  // memory location argument.
+  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+  return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1), SV, 0,
+                      false, false, 0);
+}
+
+//===----------------------------------------------------------------------===//
+//                      Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeGenCallingConv.inc"
+
+static bool CC_MBlaze2(unsigned ValNo, EVT ValVT,
+                       EVT LocVT, CCValAssign::LocInfo LocInfo,
+                       ISD::ArgFlagsTy ArgFlags, CCState &State) {
+  static const unsigned RegsSize=6;
+  static const unsigned IntRegs[] = {
+    MBlaze::R5, MBlaze::R6, MBlaze::R7,
+    MBlaze::R8, MBlaze::R9, MBlaze::R10
+  };
+
+  static const unsigned FltRegs[] = {
+    MBlaze::F5, MBlaze::F6, MBlaze::F7,
+    MBlaze::F8, MBlaze::F9, MBlaze::F10
+  };
+
+  unsigned Reg=0;
+
+  // Promote i8 and i16
+  if (LocVT == MVT::i8 || LocVT == MVT::i16) {
+    LocVT = MVT::i32;
+    if (ArgFlags.isSExt())
+      LocInfo = CCValAssign::SExt;
+    else if (ArgFlags.isZExt())
+      LocInfo = CCValAssign::ZExt;
+    else
+      LocInfo = CCValAssign::AExt;
+  }
+
+  if (ValVT == MVT::i32) {
+    Reg = State.AllocateReg(IntRegs, RegsSize);
+    LocVT = MVT::i32;
+  } else if (ValVT == MVT::f32) {
+    Reg = State.AllocateReg(FltRegs, RegsSize);
+    LocVT = MVT::f32;
+  }
+
+  if (!Reg) {
+    unsigned SizeInBytes = ValVT.getSizeInBits() >> 3;
+    unsigned Offset = State.AllocateStack(SizeInBytes, SizeInBytes);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+  } else {
+    unsigned SizeInBytes = ValVT.getSizeInBits() >> 3;
+    State.AllocateStack(SizeInBytes, SizeInBytes);
+    State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+  }
+
+  return false; // CC must always match
+}
+
+//===----------------------------------------------------------------------===//
+//                  Call Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// LowerCall - functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+/// TODO: isVarArg, isTailCall.
+SDValue MBlazeTargetLowering::
+LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
+          bool isVarArg, bool &isTailCall,
+          const SmallVectorImpl<ISD::OutputArg> &Outs,
+          const SmallVectorImpl<ISD::InputArg> &Ins,
+          DebugLoc dl, SelectionDAG &DAG,
+          SmallVectorImpl<SDValue> &InVals) {
+  // MBlaze does not yet support tail call optimization
+  isTailCall = false;
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
+                 *DAG.getContext());
+  CCInfo.AnalyzeCallOperands(Outs, CC_MBlaze2);
+
+  // Get a count of how many bytes are to be pushed on the stack.
+  unsigned NumBytes = CCInfo.getNextStackOffset();
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+  SmallVector<SDValue, 8> MemOpChains;
+
+  // First/LastArgStackLoc contains the first/last
+  // "at stack" argument location.
+  int LastArgStackLoc = 0;
+  unsigned FirstStackArgLoc = 0;
+
+  // Walk the register/memloc assignments, inserting copies/loads.
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    EVT RegVT = VA.getLocVT();
+    SDValue Arg = Outs[i].Val;
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+    default: llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full: break;
+    case CCValAssign::SExt:
+      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
+      break;
+    case CCValAssign::ZExt:
+      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
+      break;
+    case CCValAssign::AExt:
+      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
+      break;
+    }
+
+    // Arguments that can be passed on register must be kept at
+    // RegsToPass vector
+    if (VA.isRegLoc()) {
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+    } else {
+      // Register can't get to this point...
+      assert(VA.isMemLoc());
+
+      // Create the frame index object for this incoming parameter
+      LastArgStackLoc = (FirstStackArgLoc + VA.getLocMemOffset());
+      int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
+                                      LastArgStackLoc, true, false);
+
+      SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
+
+      // emit ISD::STORE whichs stores the
+      // parameter value to a stack Location
+      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+                                         false, false, 0));
+    }
+  }
+
+  // Transform all store nodes into one single node because all store
+  // nodes are independent of each other.
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Build a sequence of copy-to-reg nodes chained together with token
+  // chain and flag operands which copy the outgoing args into registers.
+  // The InFlag in necessary since all emited instructions must be
+  // stuck together.
+  SDValue InFlag;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                             RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+  // node so that legalize doesn't hack it.
+  unsigned char OpFlag = MBlazeII::MO_NO_FLAG;
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(),
+                                getPointerTy(), 0, OpFlag);
+  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+    Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
+                                getPointerTy(), OpFlag);
+
+  // MBlazeJmpLink = #chain, #target_address, #opt_in_flags...
+  //             = Chain, Callee, Reg#1, Reg#2, ...
+  //
+  // Returns a chain & a flag for retval copy to use.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+
+  // Add argument registers to the end of the list so that they are
+  // known live into the call.
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+  }
+
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+
+  Chain  = DAG.getNode(MBlazeISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  // Create the CALLSEQ_END node.
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                             DAG.getIntPtrConstant(0, true), InFlag);
+  if (!Ins.empty())
+    InFlag = Chain.getValue(1);
+
+  // Handle result values, copying them out of physregs into vregs that we
+  // return.
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+                         Ins, dl, DAG, InVals);
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+SDValue MBlazeTargetLowering::
+LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv,
+                bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals) {
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 RVLocs, *DAG.getContext());
+
+  CCInfo.AnalyzeCallResult(Ins, RetCC_MBlaze);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
+                               RVLocs[i].getValVT(), InFlag).getValue(1);
+    InFlag = Chain.getValue(2);
+    InVals.push_back(Chain.getValue(0));
+  } 
+
+  return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+//             Formal Arguments Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// LowerFormalArguments - transform physical registers into
+/// virtual registers and generate load operations for
+/// arguments places on the stack.
+SDValue MBlazeTargetLowering::
+LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+                     const SmallVectorImpl<ISD::InputArg> &Ins,
+                     DebugLoc dl, SelectionDAG &DAG,
+                     SmallVectorImpl<SDValue> &InVals) {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+
+  unsigned StackReg = MF.getTarget().getRegisterInfo()->getFrameRegister(MF);
+  VarArgsFrameIndex = 0;
+
+  // Used with vargs to acumulate store chains.
+  std::vector<SDValue> OutChains;
+
+  // Keep track of the last register used for arguments
+  unsigned ArgRegEnd = 0;
+
+  // Assign locations to all of the incoming arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 ArgLocs, *DAG.getContext());
+
+  CCInfo.AnalyzeFormalArguments(Ins, CC_MBlaze2);
+  SDValue StackPtr;
+
+  unsigned FirstStackArgLoc = 0;
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+
+    // Arguments stored on registers
+    if (VA.isRegLoc()) {
+      EVT RegVT = VA.getLocVT();
+      ArgRegEnd = VA.getLocReg();
+      TargetRegisterClass *RC = 0;
+
+      if (RegVT == MVT::i32)
+        RC = MBlaze::CPURegsRegisterClass;
+      else if (RegVT == MVT::f32)
+        RC = MBlaze::FGR32RegisterClass;
+      else
+        llvm_unreachable("RegVT not supported by LowerFormalArguments");
+
+      // Transform the arguments stored on
+      // physical registers into virtual ones
+      unsigned Reg = MF.addLiveIn(ArgRegEnd, RC);
+      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+
+      // If this is an 8 or 16-bit value, it has been passed promoted
+      // to 32 bits.  Insert an assert[sz]ext to capture this, then
+      // truncate to the right size. If if is a floating point value
+      // then convert to the correct type.
+      if (VA.getLocInfo() != CCValAssign::Full) {
+        unsigned Opcode = 0;
+        if (VA.getLocInfo() == CCValAssign::SExt)
+          Opcode = ISD::AssertSext;
+        else if (VA.getLocInfo() == CCValAssign::ZExt)
+          Opcode = ISD::AssertZext;
+        if (Opcode)
+          ArgValue = DAG.getNode(Opcode, dl, RegVT, ArgValue,
+                                 DAG.getValueType(VA.getValVT()));
+        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+      }
+
+      InVals.push_back(ArgValue);
+
+    } else { // VA.isRegLoc()
+
+      // sanity check
+      assert(VA.isMemLoc());
+
+      // The last argument is not a register
+      ArgRegEnd = 0;
+
+      // The stack pointer offset is relative to the caller stack frame.
+      // Since the real stack size is unknown here, a negative SPOffset
+      // is used so there's a way to adjust these offsets when the stack
+      // size get known (on EliminateFrameIndex). A dummy SPOffset is
+      // used instead of a direct negative address (which is recorded to
+      // be used on emitPrologue) to avoid mis-calc of the first stack
+      // offset on PEI::calculateFrameObjectOffsets.
+      // Arguments are always 32-bit.
+      unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
+      int FI = MFI->CreateFixedObject(ArgSize, 0, true, false);
+      MBlazeFI->recordLoadArgsFI(FI, -(ArgSize+
+        (FirstStackArgLoc + VA.getLocMemOffset())));
+
+      // Create load nodes to retrieve arguments from the stack
+      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0,
+                                   false, false, 0));
+    }
+  }
+
+  // To meet ABI, when VARARGS are passed on registers, the registers
+  // must have their values written to the caller stack frame. If the last
+  // argument was placed in the stack, there's no need to save any register. 
+  if ((isVarArg) && ArgRegEnd) {
+    if (StackPtr.getNode() == 0)
+      StackPtr = DAG.getRegister(StackReg, getPointerTy());
+
+    // The last register argument that must be saved is MBlaze::R10
+    TargetRegisterClass *RC = MBlaze::CPURegsRegisterClass;
+
+    unsigned Begin = MBlazeRegisterInfo::getRegisterNumbering(MBlaze::R5);
+    unsigned Start = MBlazeRegisterInfo::getRegisterNumbering(ArgRegEnd+1);
+    unsigned End   = MBlazeRegisterInfo::getRegisterNumbering(MBlaze::R10);
+    unsigned StackLoc = ArgLocs.size()-1 + (Start - Begin);
+
+    for (; Start <= End; ++Start, ++StackLoc) {
+      unsigned Reg = MBlazeRegisterInfo::getRegisterFromNumbering(Start);
+      unsigned LiveReg = MF.addLiveIn(Reg, RC);
+      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, LiveReg, MVT::i32);
+
+      int FI = MFI->CreateFixedObject(4, 0, true, false);
+      MBlazeFI->recordStoreVarArgsFI(FI, -(4+(StackLoc*4)));
+      SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
+      OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0,
+                                       false, false, 0));
+
+      // Record the frame index of the first variable argument
+      // which is a value necessary to VASTART.
+      if (!VarArgsFrameIndex)
+        VarArgsFrameIndex = FI;
+    }
+  }
+
+  // All stores are grouped in one node to allow the matching between 
+  // the size of Ins and InVals. This only happens when on varg functions
+  if (!OutChains.empty()) {
+    OutChains.push_back(Chain);
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &OutChains[0], OutChains.size());
+  }
+
+  return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+//               Return Value Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+SDValue MBlazeTargetLowering::
+LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+            const SmallVectorImpl<ISD::OutputArg> &Outs,
+            DebugLoc dl, SelectionDAG &DAG) {
+  // CCValAssign - represent the assignment of
+  // the return value to a location
+  SmallVector<CCValAssign, 16> RVLocs;
+
+  // CCState - Info about the registers and stack slot.
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 RVLocs, *DAG.getContext());
+
+  // Analize return values.
+  CCInfo.AnalyzeReturn(Outs, RetCC_MBlaze);
+
+  // If this is the first return lowered for this function, add
+  // the regs to the liveout set for the function.
+  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+    for (unsigned i = 0; i != RVLocs.size(); ++i)
+      if (RVLocs[i].isRegLoc())
+        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+  }
+
+  SDValue Flag;
+
+  // Copy the result values into the output registers.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+                             Outs[i].Val, Flag);
+
+    // guarantee that all emitted copies are
+    // stuck together, avoiding something bad
+    Flag = Chain.getValue(1);
+  }
+
+  // Return on MBlaze is always a "rtsd R15, 8"
+  if (Flag.getNode())
+    return DAG.getNode(MBlazeISD::Ret, dl, MVT::Other,
+                       Chain, DAG.getRegister(MBlaze::R15, MVT::i32), Flag);
+  else // Return Void
+    return DAG.getNode(MBlazeISD::Ret, dl, MVT::Other,
+                       Chain, DAG.getRegister(MBlaze::R15, MVT::i32));
+}
+
+//===----------------------------------------------------------------------===//
+//                           MBlaze Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+MBlazeTargetLowering::ConstraintType MBlazeTargetLowering::
+getConstraintType(const std::string &Constraint) const
+{
+  // MBlaze specific constrainy
+  //
+  // 'd' : An address register. Equivalent to r.
+  // 'y' : Equivalent to r; retained for
+  //       backwards compatibility.
+  // 'f' : Floating Point registers.
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+      default : break;
+      case 'd':
+      case 'y':
+      case 'f':
+        return C_RegisterClass;
+        break;
+    }
+  }
+  return TargetLowering::getConstraintType(Constraint);
+}
+
+/// getRegClassForInlineAsmConstraint - Given a constraint letter (e.g. "r"),
+/// return a list of registers that can be used to satisfy the constraint.
+/// This should only be used for C_RegisterClass constraints.
+std::pair<unsigned, const TargetRegisterClass*> MBlazeTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    case 'r':
+      return std::make_pair(0U, MBlaze::CPURegsRegisterClass);
+    case 'f':
+      if (VT == MVT::f32)
+        return std::make_pair(0U, MBlaze::FGR32RegisterClass);
+    }
+  }
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+/// Given a register class constraint, like 'r', if this corresponds directly
+/// to an LLVM register class, return a register of 0 and the register class
+/// pointer.
+std::vector<unsigned> MBlazeTargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
+  if (Constraint.size() != 1)
+    return std::vector<unsigned>();
+
+  switch (Constraint[0]) {
+    default : break;
+    case 'r':
+    // GCC MBlaze Constraint Letters
+    case 'd':
+    case 'y':
+      return make_vector<unsigned>(
+        MBlaze::R3,  MBlaze::R4,  MBlaze::R5,  MBlaze::R6,
+        MBlaze::R7,  MBlaze::R9,  MBlaze::R10, MBlaze::R11,
+        MBlaze::R12, MBlaze::R19, MBlaze::R20, MBlaze::R21,
+        MBlaze::R22, MBlaze::R23, MBlaze::R24, MBlaze::R25,
+        MBlaze::R26, MBlaze::R27, MBlaze::R28, MBlaze::R29,
+        MBlaze::R30, MBlaze::R31, 0);
+
+    case 'f':
+      return make_vector<unsigned>(
+        MBlaze::F3,  MBlaze::F4,  MBlaze::F5,  MBlaze::F6,
+        MBlaze::F7,  MBlaze::F9,  MBlaze::F10, MBlaze::F11,
+        MBlaze::F12, MBlaze::F19, MBlaze::F20, MBlaze::F21,
+        MBlaze::F22, MBlaze::F23, MBlaze::F24, MBlaze::F25,
+        MBlaze::F26, MBlaze::F27, MBlaze::F28, MBlaze::F29,
+        MBlaze::F30, MBlaze::F31, 0);
+  }
+  return std::vector<unsigned>();
+}
+
+bool MBlazeTargetLowering::
+isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+  // The MBlaze target isn't yet aware of offsets.
+  return false;
+}
+
+bool MBlazeTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+  return VT != MVT::f32;
+}
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.h b/lib/Target/MBlaze/MBlazeISelLowering.h
new file mode 100644
index 0000000..f8b1470
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeISelLowering.h
@@ -0,0 +1,149 @@
+//===-- MBlazeISelLowering.h - MBlaze DAG Lowering Interface ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that MBlaze uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBlazeISELLOWERING_H
+#define MBlazeISELLOWERING_H
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
+
+namespace llvm {
+  namespace MBlazeCC {
+    enum CC {
+      FIRST = 0,
+      EQ,
+      NE,
+      GT,
+      LT,
+      GE,
+      LE
+    };
+  }
+
+  namespace MBlazeISD {
+    enum NodeType {
+      // Start the numbering from where ISD NodeType finishes.
+      FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+      // Jump and link (call)
+      JmpLink,
+
+      // Handle gp_rel (small data/bss sections) relocation.
+      GPRel,
+
+      // Select CC Pseudo Instruction
+      Select_CC,
+
+      // Wrap up multiple types of instructions
+      Wrap,
+
+      // Integer Compare
+      ICmp,
+
+      // Return
+      Ret
+    };
+  }
+
+  //===--------------------------------------------------------------------===//
+  // TargetLowering Implementation
+  //===--------------------------------------------------------------------===//
+
+  class MBlazeTargetLowering : public TargetLowering  {
+    int VarArgsFrameIndex;            // FrameIndex for start of varargs area.
+
+  public:
+
+    explicit MBlazeTargetLowering(MBlazeTargetMachine &TM);
+
+    /// LowerOperation - Provide custom lowering hooks for some operations.
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+
+    /// getTargetNodeName - This method returns the name of a target specific
+    //  DAG node.
+    virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+    /// getSetCCResultType - get the ISD::SETCC result ValueType
+    MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+
+    virtual unsigned getFunctionAlignment(const Function *F) const;
+  private:
+    // Subtarget Info
+    const MBlazeSubtarget *Subtarget;
+
+
+    // Lower Operand helpers
+    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+                            CallingConv::ID CallConv, bool isVarArg,
+                            const SmallVectorImpl<ISD::InputArg> &Ins,
+                            DebugLoc dl, SelectionDAG &DAG,
+                            SmallVectorImpl<SDValue> &InVals);
+
+    // Lower Operand specifics
+    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG);
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals);
+
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg,
+                bool &isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals);
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  DebugLoc dl, SelectionDAG &DAG);
+
+    virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
+                                                         MachineBasicBlock *MBB,
+                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+
+    // Inline asm support
+    ConstraintType getConstraintType(const std::string &Constraint) const;
+
+    std::pair<unsigned, const TargetRegisterClass*>
+              getRegForInlineAsmConstraint(const std::string &Constraint,
+              EVT VT) const;
+
+    std::vector<unsigned>
+    getRegClassForInlineAsmConstraint(const std::string &Constraint,
+              EVT VT) const;
+
+    virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+    /// isFPImmLegal - Returns true if the target can instruction select the
+    /// specified FP immediate natively. If false, the legalizer will
+    /// materialize the FP immediate as a load from a constant pool.
+    virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+  };
+}
+
+#endif // MBlazeISELLOWERING_H
diff --git a/lib/Target/MBlaze/MBlazeInstrFPU.td b/lib/Target/MBlaze/MBlazeInstrFPU.td
new file mode 100644
index 0000000..a48a8c9
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeInstrFPU.td
@@ -0,0 +1,223 @@
+//===- MBlazeInstrFPU.td - MBlaze FPU Instruction defs ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlaze profiles and nodes
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlaze Operand, Complex Patterns and Transformations Definitions.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Memory Access Instructions
+//===----------------------------------------------------------------------===//
+class LoadFM<bits<6> op, string instr_asm, PatFrag OpNode> :
+             TA<op, 0x000, (outs FGR32:$dst), (ins memrr:$addr),
+                !strconcat(instr_asm, "   $dst, $addr"),
+                [(set FGR32:$dst, (OpNode xaddr:$addr))], IILoad>;
+
+class LoadFMI<bits<6> op, string instr_asm, PatFrag OpNode> :
+              TAI<op, (outs FGR32:$dst), (ins memri:$addr),
+                  !strconcat(instr_asm, "   $dst, $addr"),
+                  [(set FGR32:$dst, (OpNode iaddr:$addr))], IILoad>;
+
+class StoreFM<bits<6> op, string instr_asm, PatFrag OpNode> :
+              TA<op, 0x000, (outs), (ins FGR32:$dst, memrr:$addr),
+                 !strconcat(instr_asm, "   $dst, $addr"),
+                 [(OpNode FGR32:$dst, xaddr:$addr)], IIStore>;
+
+class StoreFMI<bits<6> op, string instr_asm, PatFrag OpNode> :
+               TAI<op, (outs), (ins FGR32:$dst, memrr:$addr),
+                   !strconcat(instr_asm, "   $dst, $addr"),
+                   [(OpNode FGR32:$dst, iaddr:$addr)], IIStore>;
+
+class ArithF<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
+             InstrItinClass itin> :
+             TA<op, flags, (outs FGR32:$dst), (ins FGR32:$b, FGR32:$c),
+                !strconcat(instr_asm, "   $dst, $b, $c"),
+                [(set FGR32:$dst, (OpNode FGR32:$b, FGR32:$c))], itin>;
+
+class CmpFN<bits<6> op, bits<11> flags, string instr_asm,
+            InstrItinClass itin> :
+            TA<op, flags, (outs CPURegs:$dst), (ins FGR32:$b, FGR32:$c),
+               !strconcat(instr_asm, "   $dst, $b, $c"),
+               [], itin>;
+
+class ArithFR<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
+             InstrItinClass itin> :
+             TA<op, flags, (outs FGR32:$dst), (ins FGR32:$b, FGR32:$c),
+                !strconcat(instr_asm, "   $dst, $c, $b"),
+                [(set FGR32:$dst, (OpNode FGR32:$b, FGR32:$c))], itin>;
+
+class ArithF2<bits<6> op, bits<11> flags, string instr_asm,
+              InstrItinClass itin> :
+              TF<op, flags, (outs FGR32:$dst), (ins FGR32:$b),
+                 !strconcat(instr_asm, "   $dst, $b"),
+                 [], itin>;
+
+class ArithIF<bits<6> op, bits<11> flags, string instr_asm,
+              InstrItinClass itin> :
+              TF<op, flags, (outs FGR32:$dst), (ins CPURegs:$b),
+                 !strconcat(instr_asm, "   $dst, $b"),
+                 [], itin>;
+
+class ArithFI<bits<6> op, bits<11> flags, string instr_asm,
+              InstrItinClass itin> :
+              TF<op, flags, (outs CPURegs:$dst), (ins FGR32:$b),
+                 !strconcat(instr_asm, "   $dst, $b"),
+                 [], itin>;
+
+class LogicF<bits<6> op, string instr_asm> :
+             TAI<op, (outs FGR32:$dst), (ins FGR32:$b, FGR32:$c),
+                 !strconcat(instr_asm, "   $dst, $b, $c"),
+                 [],
+                 IIAlu>;
+
+class LogicFI<bits<6> op, string instr_asm> :
+             TAI<op, (outs FGR32:$dst), (ins FGR32:$b, fimm:$c),
+                 !strconcat(instr_asm, "   $dst, $b, $c"),
+                 [],
+                 IIAlu>;
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FPU Arithmetic Instructions
+//===----------------------------------------------------------------------===//
+let Predicates=[HasFPU] in {
+  def FOR    :  LogicF<0x28, "or     ">;
+  def FORI   : LogicFI<0x28, "ori    ">;
+  def FADD   :  ArithF<0x16, 0x000, "fadd   ", fadd, IIAlu>;
+  def FRSUB  : ArithFR<0x16, 0x080, "frsub  ", fsub, IIAlu>;
+  def FMUL   :  ArithF<0x16, 0x100, "fmul   ", fmul, IIAlu>;
+  def FDIV   :  ArithF<0x16, 0x180, "fdiv   ", fdiv, IIAlu>;
+
+  def LWF    :   LoadFM<0x32, "lw     ", load>;
+  def LWFI   :  LoadFMI<0x32, "lwi    ", load>;
+
+  def SWF    :  StoreFM<0x32, "sw     ", store>;
+  def SWFI   : StoreFMI<0x32, "swi    ", store>;
+}
+
+let Predicates=[HasFPU,HasSqrt] in {
+  def FLT    : ArithIF<0x16, 0x280, "flt    ", IIAlu>;
+  def FINT   : ArithFI<0x16, 0x300, "fint   ", IIAlu>;
+  def FSQRT  : ArithF2<0x16, 0x300, "fsqrt  ", IIAlu>;
+}
+
+let isAsCheapAsAMove = 1 in {
+  def FCMP_UN : CmpFN<0x16, 0x200, "fcmp.un", IIAlu>;
+  def FCMP_LT : CmpFN<0x16, 0x210, "fcmp.lt", IIAlu>;
+  def FCMP_EQ : CmpFN<0x16, 0x220, "fcmp.eq", IIAlu>;
+  def FCMP_LE : CmpFN<0x16, 0x230, "fcmp.le", IIAlu>;
+  def FCMP_GT : CmpFN<0x16, 0x240, "fcmp.gt", IIAlu>;
+  def FCMP_NE : CmpFN<0x16, 0x250, "fcmp.ne", IIAlu>;
+  def FCMP_GE : CmpFN<0x16, 0x260, "fcmp.ge", IIAlu>;
+}
+
+
+let usesCustomInserter = 1 in {
+  def Select_FCC : MBlazePseudo<(outs FGR32:$dst),
+    (ins FGR32:$T, FGR32:$F, CPURegs:$CMP, i32imm:$CC),
+    "; SELECT_FCC PSEUDO!",
+    []>;
+}
+
+// Floating point conversions
+let Predicates=[HasFPU] in {
+  def : Pat<(sint_to_fp CPURegs:$V), (FLT CPURegs:$V)>;
+  def : Pat<(fp_to_sint FGR32:$V), (FINT FGR32:$V)>;
+  def : Pat<(fsqrt FGR32:$V), (FSQRT FGR32:$V)>;
+}
+
+// SET_CC operations
+let Predicates=[HasFPU] in {
+  def : Pat<(setcc FGR32:$L, FGR32:$R, SETEQ),
+            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+                       (FCMP_EQ FGR32:$L, FGR32:$R), 2)>;
+  def : Pat<(setcc FGR32:$L, FGR32:$R, SETNE),
+            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+                       (FCMP_EQ FGR32:$L, FGR32:$R), 1)>;
+  def : Pat<(setcc FGR32:$L, FGR32:$R, SETOEQ),
+            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+                       (FCMP_EQ FGR32:$L, FGR32:$R), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETONE),
+            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+                       (XOR (FCMP_UN FGR32:$L, FGR32:$R),
+                            (FCMP_EQ FGR32:$L, FGR32:$R)), 2)>;
+  def : Pat<(setcc FGR32:$L, FGR32:$R, SETONE),
+            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+                       (OR (FCMP_UN FGR32:$L, FGR32:$R),
+                           (FCMP_EQ FGR32:$L, FGR32:$R)), 2)>;
+  def : Pat<(setcc FGR32:$L, FGR32:$R, SETGT),
+            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+                       (FCMP_GT FGR32:$L, FGR32:$R), 2)>;
+  def : Pat<(setcc FGR32:$L, FGR32:$R, SETLT),
+            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+                       (FCMP_LT FGR32:$L, FGR32:$R), 2)>;
+  def : Pat<(setcc FGR32:$L, FGR32:$R, SETGE),
+            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+                       (FCMP_GE FGR32:$L, FGR32:$R), 2)>;
+  def : Pat<(setcc FGR32:$L, FGR32:$R, SETLE),
+            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+                       (FCMP_LE FGR32:$L, FGR32:$R), 2)>;
+  def : Pat<(setcc FGR32:$L, FGR32:$R, SETOGT),
+            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+                       (FCMP_GT FGR32:$L, FGR32:$R), 2)>;
+  def : Pat<(setcc FGR32:$L, FGR32:$R, SETOLT),
+            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+                       (FCMP_LT FGR32:$L, FGR32:$R), 2)>;
+  def : Pat<(setcc FGR32:$L, FGR32:$R, SETOGE),
+            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+                       (FCMP_GE FGR32:$L, FGR32:$R), 2)>;
+  def : Pat<(setcc FGR32:$L, FGR32:$R, SETOLE),
+            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+                       (FCMP_LE FGR32:$L, FGR32:$R), 2)>;
+  def : Pat<(setcc FGR32:$L, FGR32:$R, SETUEQ),
+            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+                       (OR (FCMP_UN FGR32:$L, FGR32:$R),
+                           (FCMP_EQ FGR32:$L, FGR32:$R)), 2)>;
+  def : Pat<(setcc FGR32:$L, FGR32:$R, SETUNE),
+            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+                       (FCMP_NE FGR32:$L, FGR32:$R), 2)>;
+  def : Pat<(setcc FGR32:$L, FGR32:$R, SETUGT),
+            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+                       (OR (FCMP_UN FGR32:$L, FGR32:$R),
+                           (FCMP_GT FGR32:$L, FGR32:$R)), 2)>;
+  def : Pat<(setcc FGR32:$L, FGR32:$R, SETULT),
+            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+                       (OR (FCMP_UN FGR32:$L, FGR32:$R),
+                           (FCMP_LT FGR32:$L, FGR32:$R)), 2)>;
+  def : Pat<(setcc FGR32:$L, FGR32:$R, SETUGE),
+            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+                       (OR (FCMP_UN FGR32:$L, FGR32:$R),
+                           (FCMP_GE FGR32:$L, FGR32:$R)), 2)>;
+  def : Pat<(setcc FGR32:$L, FGR32:$R, SETULE),
+            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+                       (OR (FCMP_UN FGR32:$L, FGR32:$R),
+                           (FCMP_LE FGR32:$L, FGR32:$R)), 2)>;
+  def : Pat<(setcc FGR32:$L, FGR32:$R, SETO),
+            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+                       (FCMP_UN FGR32:$L, FGR32:$R), 1)>;
+  def : Pat<(setcc FGR32:$L, FGR32:$R, SETUO),
+            (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+                       (FCMP_UN FGR32:$L, FGR32:$R), 2)>;
+}
+
+// SELECT operations
+def : Pat<(select CPURegs:$C, FGR32:$T, FGR32:$F),
+          (Select_FCC FGR32:$T, FGR32:$F, CPURegs:$C, 2)>;
+
+//===----------------------------------------------------------------------===//
+// Patterns for Floating Point Instructions
+//===----------------------------------------------------------------------===//
+def : Pat<(f32 fpimm:$imm), (FORI F0, fpimm:$imm)>;
diff --git a/lib/Target/MBlaze/MBlazeInstrFSL.td b/lib/Target/MBlaze/MBlazeInstrFSL.td
new file mode 100644
index 0000000..b59999e
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeInstrFSL.td
@@ -0,0 +1,153 @@
+//===- MBlazeInstrFSL.td - MBlaze FSL Instruction defs ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FSL Instruction Formats
+//===----------------------------------------------------------------------===//
+class FSLGetD<bits<6> op, bits<11> flags, string instr_asm, Intrinsic OpNode> :
+              TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b),
+                 !strconcat(instr_asm, " $dst, $b"),
+                 [(set CPURegs:$dst, (OpNode CPURegs:$b))], IIAlu>;
+
+class FSLGet<bits<6> op, string instr_asm, Intrinsic OpNode> :
+             TAI<op, (outs CPURegs:$dst), (ins fslimm:$b),
+                 !strconcat(instr_asm, " $dst, $b"),
+                 [(set CPURegs:$dst, (OpNode immZExt4:$b))], IIAlu>;
+
+class FSLPutD<bits<6> op, bits<11> flags, string instr_asm, Intrinsic OpNode> :
+              TA<op, flags, (outs), (ins CPURegs:$v, CPURegs:$b),
+                 !strconcat(instr_asm, " $v, $b"),
+                 [(OpNode CPURegs:$v, CPURegs:$b)], IIAlu>;
+
+class FSLPut<bits<6> op, string instr_asm, Intrinsic OpNode> :
+             TAI<op, (outs), (ins CPURegs:$v, fslimm:$b),
+                 !strconcat(instr_asm, " $v, $b"),
+                 [(OpNode CPURegs:$v, immZExt4:$b)], IIAlu>;
+
+class FSLPutTD<bits<6> op, bits<11> flags, string instr_asm, Intrinsic OpNode> :
+               TA<op, flags, (outs), (ins CPURegs:$b),
+                  !strconcat(instr_asm, " $b"),
+                  [(OpNode CPURegs:$b)], IIAlu>;
+
+class FSLPutT<bits<6> op, string instr_asm, Intrinsic OpNode> :
+              TAI<op, (outs), (ins fslimm:$b),
+                  !strconcat(instr_asm, " $b"),
+                  [(OpNode immZExt4:$b)], IIAlu>;
+
+//===----------------------------------------------------------------------===//
+// FSL Get Instructions
+//===----------------------------------------------------------------------===//
+def GET      : FSLGet<0x1B, "get      ", int_mblaze_fsl_get>;
+def AGET     : FSLGet<0x1B, "aget     ", int_mblaze_fsl_aget>;
+def CGET     : FSLGet<0x1B, "cget     ", int_mblaze_fsl_cget>;
+def CAGET    : FSLGet<0x1B, "caget    ", int_mblaze_fsl_caget>;
+def EGET     : FSLGet<0x1B, "eget     ", int_mblaze_fsl_eget>;
+def EAGET    : FSLGet<0x1B, "eaget    ", int_mblaze_fsl_eaget>;
+def ECGET    : FSLGet<0x1B, "ecget    ", int_mblaze_fsl_ecget>;
+def ECAGET   : FSLGet<0x1B, "ecaget   ", int_mblaze_fsl_ecaget>;
+def NGET     : FSLGet<0x1B, "nget     ", int_mblaze_fsl_nget>;
+def NAGET    : FSLGet<0x1B, "naget    ", int_mblaze_fsl_naget>;
+def NCGET    : FSLGet<0x1B, "ncget    ", int_mblaze_fsl_ncget>;
+def NCAGET   : FSLGet<0x1B, "ncaget   ", int_mblaze_fsl_ncaget>;
+def NEGET    : FSLGet<0x1B, "neget    ", int_mblaze_fsl_neget>;
+def NEAGET   : FSLGet<0x1B, "neaget   ", int_mblaze_fsl_neaget>;
+def NECGET   : FSLGet<0x1B, "necget   ", int_mblaze_fsl_necget>;
+def NECAGET  : FSLGet<0x1B, "necaget  ", int_mblaze_fsl_necaget>;
+def TGET     : FSLGet<0x1B, "tget     ", int_mblaze_fsl_tget>;
+def TAGET    : FSLGet<0x1B, "taget    ", int_mblaze_fsl_taget>;
+def TCGET    : FSLGet<0x1B, "tcget    ", int_mblaze_fsl_tcget>;
+def TCAGET   : FSLGet<0x1B, "tcaget   ", int_mblaze_fsl_tcaget>;
+def TEGET    : FSLGet<0x1B, "teget    ", int_mblaze_fsl_teget>;
+def TEAGET   : FSLGet<0x1B, "teaget   ", int_mblaze_fsl_teaget>;
+def TECGET   : FSLGet<0x1B, "tecget   ", int_mblaze_fsl_tecget>;
+def TECAGET  : FSLGet<0x1B, "tecaget  ", int_mblaze_fsl_tecaget>;
+def TNGET    : FSLGet<0x1B, "tnget    ", int_mblaze_fsl_tnget>;
+def TNAGET   : FSLGet<0x1B, "tnaget   ", int_mblaze_fsl_tnaget>;
+def TNCGET   : FSLGet<0x1B, "tncget   ", int_mblaze_fsl_tncget>;
+def TNCAGET  : FSLGet<0x1B, "tncaget  ", int_mblaze_fsl_tncaget>;
+def TNEGET   : FSLGet<0x1B, "tneget   ", int_mblaze_fsl_tneget>;
+def TNEAGET  : FSLGet<0x1B, "tneaget  ", int_mblaze_fsl_tneaget>;
+def TNECGET  : FSLGet<0x1B, "tnecget  ", int_mblaze_fsl_tnecget>;
+def TNECAGET : FSLGet<0x1B, "tnecaget ", int_mblaze_fsl_tnecaget>;
+
+//===----------------------------------------------------------------------===//
+// FSL Dynamic Get Instructions
+//===----------------------------------------------------------------------===//
+def GETD      : FSLGetD<0x1B, 0x00, "getd     ", int_mblaze_fsl_get>;
+def AGETD     : FSLGetD<0x1B, 0x00, "agetd    ", int_mblaze_fsl_aget>;
+def CGETD     : FSLGetD<0x1B, 0x00, "cgetd    ", int_mblaze_fsl_cget>;
+def CAGETD    : FSLGetD<0x1B, 0x00, "cagetd   ", int_mblaze_fsl_caget>;
+def EGETD     : FSLGetD<0x1B, 0x00, "egetd    ", int_mblaze_fsl_eget>;
+def EAGETD    : FSLGetD<0x1B, 0x00, "eagetd   ", int_mblaze_fsl_eaget>;
+def ECGETD    : FSLGetD<0x1B, 0x00, "ecgetd   ", int_mblaze_fsl_ecget>;
+def ECAGETD   : FSLGetD<0x1B, 0x00, "ecagetd  ", int_mblaze_fsl_ecaget>;
+def NGETD     : FSLGetD<0x1B, 0x00, "ngetd    ", int_mblaze_fsl_nget>;
+def NAGETD    : FSLGetD<0x1B, 0x00, "nagetd   ", int_mblaze_fsl_naget>;
+def NCGETD    : FSLGetD<0x1B, 0x00, "ncgetd   ", int_mblaze_fsl_ncget>;
+def NCAGETD   : FSLGetD<0x1B, 0x00, "ncagetd  ", int_mblaze_fsl_ncaget>;
+def NEGETD    : FSLGetD<0x1B, 0x00, "negetd   ", int_mblaze_fsl_neget>;
+def NEAGETD   : FSLGetD<0x1B, 0x00, "neagetd  ", int_mblaze_fsl_neaget>;
+def NECGETD   : FSLGetD<0x1B, 0x00, "necgetd  ", int_mblaze_fsl_necget>;
+def NECAGETD  : FSLGetD<0x1B, 0x00, "necagetd ", int_mblaze_fsl_necaget>;
+def TGETD     : FSLGetD<0x1B, 0x00, "tgetd    ", int_mblaze_fsl_tget>;
+def TAGETD    : FSLGetD<0x1B, 0x00, "tagetd   ", int_mblaze_fsl_taget>;
+def TCGETD    : FSLGetD<0x1B, 0x00, "tcgetd   ", int_mblaze_fsl_tcget>;
+def TCAGETD   : FSLGetD<0x1B, 0x00, "tcagetd  ", int_mblaze_fsl_tcaget>;
+def TEGETD    : FSLGetD<0x1B, 0x00, "tegetd   ", int_mblaze_fsl_teget>;
+def TEAGETD   : FSLGetD<0x1B, 0x00, "teagetd  ", int_mblaze_fsl_teaget>;
+def TECGETD   : FSLGetD<0x1B, 0x00, "tecgetd  ", int_mblaze_fsl_tecget>;
+def TECAGETD  : FSLGetD<0x1B, 0x00, "tecagetd ", int_mblaze_fsl_tecaget>;
+def TNGETD    : FSLGetD<0x1B, 0x00, "tngetd   ", int_mblaze_fsl_tnget>;
+def TNAGETD   : FSLGetD<0x1B, 0x00, "tnagetd  ", int_mblaze_fsl_tnaget>;
+def TNCGETD   : FSLGetD<0x1B, 0x00, "tncgetd  ", int_mblaze_fsl_tncget>;
+def TNCAGETD  : FSLGetD<0x1B, 0x00, "tncagetd ", int_mblaze_fsl_tncaget>;
+def TNEGETD   : FSLGetD<0x1B, 0x00, "tnegetd  ", int_mblaze_fsl_tneget>;
+def TNEAGETD  : FSLGetD<0x1B, 0x00, "tneagetd ", int_mblaze_fsl_tneaget>;
+def TNECGETD  : FSLGetD<0x1B, 0x00, "tnecgetd ", int_mblaze_fsl_tnecget>;
+def TNECAGETD : FSLGetD<0x1B, 0x00, "tnecagetd", int_mblaze_fsl_tnecaget>;
+
+//===----------------------------------------------------------------------===//
+// FSL Put Instructions
+//===----------------------------------------------------------------------===//
+def PUT     :  FSLPut<0x1B, "put      ", int_mblaze_fsl_put>;
+def APUT    :  FSLPut<0x1B, "aput     ", int_mblaze_fsl_aput>;
+def CPUT    :  FSLPut<0x1B, "cput     ", int_mblaze_fsl_cput>;
+def CAPUT   :  FSLPut<0x1B, "caput    ", int_mblaze_fsl_caput>;
+def NPUT    :  FSLPut<0x1B, "nput     ", int_mblaze_fsl_nput>;
+def NAPUT   :  FSLPut<0x1B, "naput    ", int_mblaze_fsl_naput>;
+def NCPUT   :  FSLPut<0x1B, "ncput    ", int_mblaze_fsl_ncput>;
+def NCAPUT  :  FSLPut<0x1B, "ncaput   ", int_mblaze_fsl_ncaput>;
+def TPUT    : FSLPutT<0x1B, "tput     ", int_mblaze_fsl_tput>;
+def TAPUT   : FSLPutT<0x1B, "taput    ", int_mblaze_fsl_taput>;
+def TCPUT   : FSLPutT<0x1B, "tcput    ", int_mblaze_fsl_tcput>;
+def TCAPUT  : FSLPutT<0x1B, "tcaput   ", int_mblaze_fsl_tcaput>;
+def TNPUT   : FSLPutT<0x1B, "tnput    ", int_mblaze_fsl_tnput>;
+def TNAPUT  : FSLPutT<0x1B, "tnaput   ", int_mblaze_fsl_tnaput>;
+def TNCPUT  : FSLPutT<0x1B, "tncput   ", int_mblaze_fsl_tncput>;
+def TNCAPUT : FSLPutT<0x1B, "tncaput  ", int_mblaze_fsl_tncaput>;
+
+//===----------------------------------------------------------------------===//
+// FSL Dynamic Put Instructions
+//===----------------------------------------------------------------------===//
+def PUTD     :  FSLPutD<0x1B, 0x00, "putd     ", int_mblaze_fsl_put>;
+def APUTD    :  FSLPutD<0x1B, 0x00, "aputd    ", int_mblaze_fsl_aput>;
+def CPUTD    :  FSLPutD<0x1B, 0x00, "cputd    ", int_mblaze_fsl_cput>;
+def CAPUTD   :  FSLPutD<0x1B, 0x00, "caputd   ", int_mblaze_fsl_caput>;
+def NPUTD    :  FSLPutD<0x1B, 0x00, "nputd    ", int_mblaze_fsl_nput>;
+def NAPUTD   :  FSLPutD<0x1B, 0x00, "naputd   ", int_mblaze_fsl_naput>;
+def NCPUTD   :  FSLPutD<0x1B, 0x00, "ncputd   ", int_mblaze_fsl_ncput>;
+def NCAPUTD  :  FSLPutD<0x1B, 0x00, "ncaputd  ", int_mblaze_fsl_ncaput>;
+def TPUTD    : FSLPutTD<0x1B, 0x00, "tputd    ", int_mblaze_fsl_tput>;
+def TAPUTD   : FSLPutTD<0x1B, 0x00, "taputd   ", int_mblaze_fsl_taput>;
+def TCPUTD   : FSLPutTD<0x1B, 0x00, "tcputd   ", int_mblaze_fsl_tcput>;
+def TCAPUTD  : FSLPutTD<0x1B, 0x00, "tcaputd  ", int_mblaze_fsl_tcaput>;
+def TNPUTD   : FSLPutTD<0x1B, 0x00, "tnputd   ", int_mblaze_fsl_tnput>;
+def TNAPUTD  : FSLPutTD<0x1B, 0x00, "tnaputd  ", int_mblaze_fsl_tnaput>;
+def TNCPUTD  : FSLPutTD<0x1B, 0x00, "tncputd  ", int_mblaze_fsl_tncput>;
+def TNCAPUTD : FSLPutTD<0x1B, 0x00, "tncaputd ", int_mblaze_fsl_tncaput>;
diff --git a/lib/Target/MBlaze/MBlazeInstrFormats.td b/lib/Target/MBlaze/MBlazeInstrFormats.td
new file mode 100644
index 0000000..7d65543
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeInstrFormats.td
@@ -0,0 +1,246 @@
+//===- MBlazeInstrFormats.td - MB Instruction defs --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Describe MBlaze instructions format
+//
+//  CPU INSTRUCTION FORMATS
+//
+//  opcode  - operation code.
+//  rd      - dst reg.
+//  ra      - first src. reg.
+//  rb      - second src. reg.
+//  imm16   - 16-bit immediate value.
+//
+//===----------------------------------------------------------------------===//
+
+// Generic MBlaze Format
+class MBlazeInst<dag outs, dag ins, string asmstr, list<dag> pattern, 
+               InstrItinClass itin> : Instruction 
+{
+  field bits<32> Inst;
+
+  let Namespace = "MBlaze";
+
+  bits<6> opcode;
+
+  // Top 6 bits are the 'opcode' field
+  let Inst{0-5} = opcode;   
+  
+  dag OutOperandList = outs;
+  dag InOperandList  = ins;
+
+  let AsmString   = asmstr;
+  let Pattern     = pattern;
+  let Itinerary   = itin;
+}
+
+//===----------------------------------------------------------------------===//
+// Pseudo instruction class
+//===----------------------------------------------------------------------===//
+class MBlazePseudo<dag outs, dag ins, string asmstr, list<dag> pattern>:
+      MBlazeInst<outs, ins, asmstr, pattern, IIPseudo>;
+
+//===----------------------------------------------------------------------===//
+// Type A instruction class in MBlaze : <|opcode|rd|ra|rb|flags|>
+//===----------------------------------------------------------------------===//
+
+class TA<bits<6> op, bits<11> flags, dag outs, dag ins, string asmstr,
+         list<dag> pattern, InstrItinClass itin> : 
+         MBlazeInst<outs, ins, asmstr, pattern, itin> 
+{
+  bits<5> rd;
+  bits<5> ra;
+  bits<5> rb;
+
+  let opcode = op;
+
+  let Inst{6-10}  = rd;
+  let Inst{11-15} = ra; 
+  let Inst{16-20} = rb;
+  let Inst{21-31} = flags;
+}
+
+class TAI<bits<6> op, dag outs, dag ins, string asmstr,
+          list<dag> pattern, InstrItinClass itin> :
+          MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+  bits<5> rd;
+  bits<5> ra;
+  bits<16> imm16;
+
+  let opcode = op;
+
+  let Inst{6-10}  = rd;
+  let Inst{11-15} = ra; 
+  let Inst{16-31} = imm16;
+}
+
+class TIMM<bits<6> op, dag outs, dag ins, string asmstr,
+           list<dag> pattern, InstrItinClass itin> :
+           MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+  bits<5> ra;
+  bits<16> imm16;
+
+  let opcode = op;
+
+  let Inst{6-15}  = 0;
+  let Inst{16-31} = imm16;
+}
+
+class TADDR<bits<6> op, dag outs, dag ins, string asmstr,
+            list<dag> pattern, InstrItinClass itin> :
+            MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+  bits<26> addr;
+
+  let opcode = op;
+
+  let Inst{6-31} = addr;
+}
+
+//===----------------------------------------------------------------------===//
+// Type B instruction class in MBlaze : <|opcode|rd|ra|immediate|>
+//===----------------------------------------------------------------------===//
+
+class TB<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+         InstrItinClass itin> : 
+         MBlazeInst<outs, ins, asmstr, pattern, itin> 
+{
+  bits<5>  rd;
+  bits<5>  ra;
+  bits<16> imm16;
+
+  let opcode = op;
+
+  let Inst{6-10}  = rd;
+  let Inst{11-15} = ra; 
+  let Inst{16-31} = imm16;
+}
+
+//===----------------------------------------------------------------------===//
+// Float instruction class in MBlaze : <|opcode|rd|ra|flags|>
+//===----------------------------------------------------------------------===//
+
+class TF<bits<6> op, bits<11> flags, dag outs, dag ins, string asmstr,
+         list<dag> pattern, InstrItinClass itin> : 
+         MBlazeInst<outs, ins, asmstr, pattern, itin> 
+{
+  bits<5>  rd;
+  bits<5>  ra;
+
+  let opcode = op;
+
+  let Inst{6-10}  = rd;
+  let Inst{11-15} = ra; 
+  let Inst{16-20} = 0;
+  let Inst{21-31} = flags;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch instruction class in MBlaze : <|opcode|rd|br|ra|flags|>
+//===----------------------------------------------------------------------===//
+
+class TBR<bits<6> op, bits<5> br, bits<11> flags, dag outs, dag ins,
+          string asmstr, list<dag> pattern, InstrItinClass itin> :
+          MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+  bits<5> ra;
+
+  let opcode = op;
+
+  let Inst{6-10}  = 0;
+  let Inst{11-15} = br; 
+  let Inst{16-20} = ra;
+  let Inst{21-31} = flags;
+}
+
+class TBRC<bits<6> op, bits<5> br, bits<11> flags, dag outs, dag ins,
+           string asmstr, list<dag> pattern, InstrItinClass itin> :
+           MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+  bits<5> ra;
+  bits<5> rb;
+
+  let opcode = op;
+
+  let Inst{6-10}  = br;
+  let Inst{11-15} = ra; 
+  let Inst{16-20} = rb;
+  let Inst{21-31} = flags;
+}
+
+class TBRL<bits<6> op, bits<5> br, bits<11> flags, dag outs, dag ins,
+           string asmstr, list<dag> pattern, InstrItinClass itin> :
+           MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+  bits<5> ra;
+
+  let opcode = op;
+
+  let Inst{6-10}  = 0xF;
+  let Inst{11-15} = br; 
+  let Inst{16-20} = ra;
+  let Inst{21-31} = flags;
+}
+
+class TBRI<bits<6> op, bits<5> br, dag outs, dag ins,
+           string asmstr, list<dag> pattern, InstrItinClass itin> :
+           MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+  bits<16> imm16;
+
+  let opcode = op;
+
+  let Inst{6-10}  = 0;
+  let Inst{11-15} = br; 
+  let Inst{16-31} = imm16;
+}
+
+class TBRLI<bits<6> op, bits<5> br, dag outs, dag ins,
+            string asmstr, list<dag> pattern, InstrItinClass itin> :
+            MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+  bits<16> imm16;
+
+  let opcode = op;
+
+  let Inst{6-10}  = 0xF;
+  let Inst{11-15} = br; 
+  let Inst{16-31} = imm16;
+}
+
+class TBRCI<bits<6> op, bits<5> br, dag outs, dag ins,
+            string asmstr, list<dag> pattern, InstrItinClass itin> :
+            MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+  bits<5> ra;
+  bits<16> imm16;
+
+  let opcode = op;
+
+  let Inst{6-10}  = br;
+  let Inst{11-15} = ra; 
+  let Inst{16-31} = imm16;
+}
+
+class TRET<bits<6> op, dag outs, dag ins,
+            string asmstr, list<dag> pattern, InstrItinClass itin> :
+            MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+  bits<5>  ra;
+  bits<16> imm16;
+
+  let opcode = op;
+
+  let Inst{6-10}  = 0x10;
+  let Inst{11-15} = ra; 
+  let Inst{16-31} = imm16;
+}
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
new file mode 100644
index 0000000..a7e8eb7
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
@@ -0,0 +1,222 @@
+//===- MBlazeInstrInfo.cpp - MBlaze Instruction Information -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeInstrInfo.h"
+#include "MBlazeTargetMachine.h"
+#include "MBlazeMachineFunction.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "MBlazeGenInstrInfo.inc"
+
+using namespace llvm;
+
+MBlazeInstrInfo::MBlazeInstrInfo(MBlazeTargetMachine &tm)
+  : TargetInstrInfoImpl(MBlazeInsts, array_lengthof(MBlazeInsts)),
+    TM(tm), RI(*TM.getSubtargetImpl(), *this) {}
+
+static bool isZeroImm(const MachineOperand &op) {
+  return op.isImm() && op.getImm() == 0;
+}
+
+/// Return true if the instruction is a register to register move and
+/// leave the source and dest operands in the passed parameters.
+bool MBlazeInstrInfo::
+isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg,
+            unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
+  SrcSubIdx = DstSubIdx = 0; // No sub-registers.
+
+  // add $dst, $src, $zero || addu $dst, $zero, $src
+  // or  $dst, $src, $zero || or   $dst, $zero, $src
+  if ((MI.getOpcode() == MBlaze::ADD) || (MI.getOpcode() == MBlaze::OR)) {
+    if (MI.getOperand(1).isReg() && MI.getOperand(1).getReg() == MBlaze::R0) {
+      DstReg = MI.getOperand(0).getReg();
+      SrcReg = MI.getOperand(2).getReg();
+      return true;
+    } else if (MI.getOperand(2).isReg() && 
+               MI.getOperand(2).getReg() == MBlaze::R0) {
+      DstReg = MI.getOperand(0).getReg();
+      SrcReg = MI.getOperand(1).getReg();
+      return true;
+    }
+  }
+
+  // addi $dst, $src, 0
+  // ori  $dst, $src, 0
+  if ((MI.getOpcode() == MBlaze::ADDI) || (MI.getOpcode() == MBlaze::ORI)) {
+    if ((MI.getOperand(1).isReg()) && (isZeroImm(MI.getOperand(2)))) {
+      DstReg = MI.getOperand(0).getReg();
+      SrcReg = MI.getOperand(1).getReg();
+      return true;
+    }
+  }
+
+  return false;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned MBlazeInstrInfo::
+isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const {
+  if (MI->getOpcode() == MBlaze::LWI) {
+    if ((MI->getOperand(2).isFI()) && // is a stack slot
+        (MI->getOperand(1).isImm()) &&  // the imm is zero
+        (isZeroImm(MI->getOperand(1)))) {
+      FrameIndex = MI->getOperand(2).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+  }
+
+  return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned MBlazeInstrInfo::
+isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const {
+  if (MI->getOpcode() == MBlaze::SWI) {
+    if ((MI->getOperand(2).isFI()) && // is a stack slot
+        (MI->getOperand(1).isImm()) &&  // the imm is zero
+        (isZeroImm(MI->getOperand(1)))) {
+      FrameIndex = MI->getOperand(2).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+  }
+  return 0;
+}
+
+/// insertNoop - If data hazard condition is found insert the target nop
+/// instruction.
+void MBlazeInstrInfo::
+insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const {
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+  BuildMI(MBB, MI, DL, get(MBlaze::NOP));
+}
+
+bool MBlazeInstrInfo::
+copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+             unsigned DestReg, unsigned SrcReg,
+             const TargetRegisterClass *DestRC,
+             const TargetRegisterClass *SrcRC) const {
+  DebugLoc dl = DebugLoc::getUnknownLoc();
+  llvm::BuildMI(MBB, I, dl, get(MBlaze::ADD), DestReg)
+      .addReg(SrcReg).addReg(MBlaze::R0);
+  return true;
+}
+
+void MBlazeInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                    unsigned SrcReg, bool isKill, int FI,
+                    const TargetRegisterClass *RC) const {
+  DebugLoc dl = DebugLoc::getUnknownLoc();
+  BuildMI(MBB, I, dl, get(MBlaze::SWI)).addReg(SrcReg,getKillRegState(isKill))
+    .addImm(0).addFrameIndex(FI);
+}
+
+void MBlazeInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                     unsigned DestReg, int FI,
+                     const TargetRegisterClass *RC) const {
+  DebugLoc dl = DebugLoc::getUnknownLoc();
+  BuildMI(MBB, I, dl, get(MBlaze::LWI), DestReg)
+      .addImm(0).addFrameIndex(FI);
+}
+
+MachineInstr *MBlazeInstrInfo::
+foldMemoryOperandImpl(MachineFunction &MF,
+                      MachineInstr* MI,
+                      const SmallVectorImpl<unsigned> &Ops, int FI) const {
+  if (Ops.size() != 1) return NULL;
+
+  MachineInstr *NewMI = NULL;
+
+  switch (MI->getOpcode()) {
+  case MBlaze::OR:
+  case MBlaze::ADD:
+    if ((MI->getOperand(0).isReg()) &&
+        (MI->getOperand(2).isReg()) &&
+        (MI->getOperand(2).getReg() == MBlaze::R0) &&
+        (MI->getOperand(1).isReg())) {
+      if (Ops[0] == 0) {    // COPY -> STORE
+        unsigned SrcReg = MI->getOperand(1).getReg();
+        bool isKill = MI->getOperand(1).isKill();
+        bool isUndef = MI->getOperand(1).isUndef();
+        NewMI = BuildMI(MF, MI->getDebugLoc(), get(MBlaze::SW))
+          .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
+          .addImm(0).addFrameIndex(FI);
+      } else {              // COPY -> LOAD
+        unsigned DstReg = MI->getOperand(0).getReg();
+        bool isDead = MI->getOperand(0).isDead();
+        bool isUndef = MI->getOperand(0).isUndef();
+        NewMI = BuildMI(MF, MI->getDebugLoc(), get(MBlaze::LW))
+          .addReg(DstReg, RegState::Define | getDeadRegState(isDead) |
+                  getUndefRegState(isUndef))
+          .addImm(0).addFrameIndex(FI);
+      }
+    }
+    break;
+  }
+
+  return NewMI;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Analysis
+//===----------------------------------------------------------------------===//
+unsigned MBlazeInstrInfo::
+InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+             MachineBasicBlock *FBB,
+             const SmallVectorImpl<MachineOperand> &Cond) const {
+  DebugLoc dl = DebugLoc::getUnknownLoc();
+
+  // Can only insert uncond branches so far.
+  assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!");
+  BuildMI(&MBB, dl, get(MBlaze::BRI)).addMBB(TBB);
+  return 1;
+}
+
+/// getGlobalBaseReg - Return a virtual register initialized with the
+/// the global base register value. Output instructions required to
+/// initialize the register in the function entry block, if necessary.
+///
+unsigned MBlazeInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
+  MBlazeFunctionInfo *MBlazeFI = MF->getInfo<MBlazeFunctionInfo>();
+  unsigned GlobalBaseReg = MBlazeFI->getGlobalBaseReg();
+  if (GlobalBaseReg != 0)
+    return GlobalBaseReg;
+
+  // Insert the set of GlobalBaseReg into the first MBB of the function
+  MachineBasicBlock &FirstMBB = MF->front();
+  MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+  MachineRegisterInfo &RegInfo = MF->getRegInfo();
+  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+
+  GlobalBaseReg = RegInfo.createVirtualRegister(MBlaze::CPURegsRegisterClass);
+  bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, MBlaze::R20,
+                              MBlaze::CPURegsRegisterClass,
+                              MBlaze::CPURegsRegisterClass);
+  assert(Ok && "Couldn't assign to global base register!");
+  Ok = Ok; // Silence warning when assertions are turned off.
+  RegInfo.addLiveIn(MBlaze::R20);
+
+  MBlazeFI->setGlobalBaseReg(GlobalBaseReg);
+  return GlobalBaseReg;
+}
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h
new file mode 100644
index 0000000..4f79f1c
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.h
@@ -0,0 +1,242 @@
+//===- MBlazeInstrInfo.h - MBlaze Instruction Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEINSTRUCTIONINFO_H
+#define MBLAZEINSTRUCTIONINFO_H
+
+#include "MBlaze.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "MBlazeRegisterInfo.h"
+
+namespace llvm {
+
+namespace MBlaze {
+
+  // MBlaze Branch Codes
+  enum FPBranchCode {
+    BRANCH_F,
+    BRANCH_T,
+    BRANCH_FL,
+    BRANCH_TL,
+    BRANCH_INVALID
+  };
+
+  // MBlaze Condition Codes
+  enum CondCode {
+    // To be used with float branch True
+    FCOND_F,
+    FCOND_UN,
+    FCOND_EQ,
+    FCOND_UEQ,
+    FCOND_OLT,
+    FCOND_ULT,
+    FCOND_OLE,
+    FCOND_ULE,
+    FCOND_SF,
+    FCOND_NGLE,
+    FCOND_SEQ,
+    FCOND_NGL,
+    FCOND_LT,
+    FCOND_NGE,
+    FCOND_LE,
+    FCOND_NGT,
+
+    // To be used with float branch False
+    // This conditions have the same mnemonic as the
+    // above ones, but are used with a branch False;
+    FCOND_T,
+    FCOND_OR,
+    FCOND_NEQ,
+    FCOND_OGL,
+    FCOND_UGE,
+    FCOND_OGE,
+    FCOND_UGT,
+    FCOND_OGT,
+    FCOND_ST,
+    FCOND_GLE,
+    FCOND_SNE,
+    FCOND_GL,
+    FCOND_NLT,
+    FCOND_GE,
+    FCOND_NLE,
+    FCOND_GT,
+
+    // Only integer conditions
+    COND_E,
+    COND_GZ,
+    COND_GEZ,
+    COND_LZ,
+    COND_LEZ,
+    COND_NE,
+    COND_INVALID
+  };
+
+  // Turn condition code into conditional branch opcode.
+  unsigned GetCondBranchFromCond(CondCode CC);
+
+  /// GetOppositeBranchCondition - Return the inverse of the specified cond,
+  /// e.g. turning COND_E to COND_NE.
+  CondCode GetOppositeBranchCondition(MBlaze::CondCode CC);
+
+  /// MBlazeCCToString - Map each FP condition code to its string
+  inline static const char *MBlazeFCCToString(MBlaze::CondCode CC)
+  {
+    switch (CC) {
+      default: llvm_unreachable("Unknown condition code");
+      case FCOND_F:
+      case FCOND_T:   return "f";
+      case FCOND_UN:
+      case FCOND_OR:  return "un";
+      case FCOND_EQ:
+      case FCOND_NEQ: return "eq";
+      case FCOND_UEQ:
+      case FCOND_OGL: return "ueq";
+      case FCOND_OLT:
+      case FCOND_UGE: return "olt";
+      case FCOND_ULT:
+      case FCOND_OGE: return "ult";
+      case FCOND_OLE:
+      case FCOND_UGT: return "ole";
+      case FCOND_ULE:
+      case FCOND_OGT: return "ule";
+      case FCOND_SF:
+      case FCOND_ST:  return "sf";
+      case FCOND_NGLE:
+      case FCOND_GLE: return "ngle";
+      case FCOND_SEQ:
+      case FCOND_SNE: return "seq";
+      case FCOND_NGL:
+      case FCOND_GL:  return "ngl";
+      case FCOND_LT:
+      case FCOND_NLT: return "lt";
+      case FCOND_NGE:
+      case FCOND_GE:  return "ge";
+      case FCOND_LE:
+      case FCOND_NLE: return "nle";
+      case FCOND_NGT:
+      case FCOND_GT:  return "gt";
+    }
+  }
+}
+
+/// MBlazeII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace MBlazeII {
+  /// Target Operand Flag enum.
+  enum TOF {
+    //===------------------------------------------------------------------===//
+    // MBlaze Specific MachineOperand flags.
+    MO_NO_FLAG,
+
+    /// MO_GOT - Represents the offset into the global offset table at which
+    /// the address the relocation entry symbol resides during execution.
+    MO_GOT,
+
+    /// MO_GOT_CALL - Represents the offset into the global offset table at
+    /// which the address of a call site relocation entry symbol resides
+    /// during execution. This is different from the above since this flag
+    /// can only be present in call instructions.
+    MO_GOT_CALL,
+
+    /// MO_GPREL - Represents the offset from the current gp value to be used
+    /// for the relocatable object file being produced.
+    MO_GPREL,
+
+    /// MO_ABS_HILO - Represents the hi or low part of an absolute symbol
+    /// address.
+    MO_ABS_HILO
+
+  };
+}
+
+class MBlazeInstrInfo : public TargetInstrInfoImpl {
+  MBlazeTargetMachine &TM;
+  const MBlazeRegisterInfo RI;
+public:
+  explicit MBlazeInstrInfo(MBlazeTargetMachine &TM);
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  virtual const MBlazeRegisterInfo &getRegisterInfo() const { return RI; }
+
+  /// Return true if the instruction is a register to register move and return
+  /// the source and dest operands and their sub-register indices by reference.
+  virtual bool isMoveInstr(const MachineInstr &MI,
+                           unsigned &SrcReg, unsigned &DstReg,
+                           unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+  /// isLoadFromStackSlot - If the specified machine instruction is a direct
+  /// load from a stack slot, return the virtual or physical register number of
+  /// the destination along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than loading from the stack slot.
+  virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                                       int &FrameIndex) const;
+
+  /// isStoreToStackSlot - If the specified machine instruction is a direct
+  /// store to a stack slot, return the virtual or physical register number of
+  /// the source reg along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than storing to the stack slot.
+  virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const;
+
+  /// Branch Analysis
+  virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                                MachineBasicBlock *FBB,
+                            const SmallVectorImpl<MachineOperand> &Cond) const;
+  virtual bool copyRegToReg(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator I,
+                            unsigned DestReg, unsigned SrcReg,
+                            const TargetRegisterClass *DestRC,
+                            const TargetRegisterClass *SrcRC) const;
+  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass *RC) const;
+
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MBBI,
+                                    unsigned DestReg, int FrameIndex,
+                                    const TargetRegisterClass *RC) const;
+
+  virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+                                              MachineInstr* MI,
+                                           const SmallVectorImpl<unsigned> &Ops,
+                                              int FrameIndex) const;
+
+  virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+                                              MachineInstr* MI,
+                                           const SmallVectorImpl<unsigned> &Ops,
+                                              MachineInstr* LoadMI) const {
+    return 0;
+  }
+
+  /// Insert nop instruction when hazard condition is found
+  virtual void insertNoop(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MI) const;
+
+  /// getGlobalBaseReg - Return a virtual register initialized with the
+  /// the global base register value. Output instructions required to
+  /// initialize the register in the function entry block, if necessary.
+  ///
+  unsigned getGlobalBaseReg(MachineFunction *MF) const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.td b/lib/Target/MBlaze/MBlazeInstrInfo.td
new file mode 100644
index 0000000..3c406dd
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.td
@@ -0,0 +1,672 @@
+//===- MBlazeInstrInfo.td - MBlaze Instruction defs -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+include "MBlazeInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// MBlaze profiles and nodes
+//===----------------------------------------------------------------------===//
+def SDT_MBlazeRet     : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_MBlazeJmpLink : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+
+// Call
+def MBlazeJmpLink : SDNode<"MBlazeISD::JmpLink",SDT_MBlazeJmpLink,
+                               [SDNPHasChain,SDNPOptInFlag,SDNPOutFlag]>;
+
+// Return
+def MBlazeRet : SDNode<"MBlazeISD::Ret", SDT_MBlazeRet,
+                           [SDNPHasChain, SDNPOptInFlag]>;
+
+// Hi and Lo nodes are used to handle global addresses. Used on 
+// MBlazeISelLowering to lower stuff like GlobalAddress, ExternalSymbol 
+// static model.
+def MBWrapper   : SDNode<"MBlazeISD::Wrap", SDTIntUnaryOp>;
+def MBlazeGPRel : SDNode<"MBlazeISD::GPRel", SDTIntUnaryOp>;
+
+def SDT_MBCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
+def SDT_MBCallSeqEnd   : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MBCallSeqStart,
+                           [SDNPHasChain, SDNPOutFlag]>;
+def callseq_end   : SDNode<"ISD::CALLSEQ_END", SDT_MBCallSeqEnd,
+                           [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+def SDTMBlazeSelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>]>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze Instruction Predicate Definitions.
+//===----------------------------------------------------------------------===//
+def HasPipe3     : Predicate<"Subtarget.hasPipe3()">;
+def HasBarrel    : Predicate<"Subtarget.hasBarrel()">;
+def NoBarrel     : Predicate<"!Subtarget.hasBarrel()">;
+def HasDiv       : Predicate<"Subtarget.hasDiv()">;
+def HasMul       : Predicate<"Subtarget.hasMul()">;
+def HasFSL       : Predicate<"Subtarget.hasFSL()">;
+def HasEFSL      : Predicate<"Subtarget.hasEFSL()">;
+def HasMSRSet    : Predicate<"Subtarget.hasMSRSet()">;
+def HasException : Predicate<"Subtarget.hasException()">;
+def HasPatCmp    : Predicate<"Subtarget.hasPatCmp()">;
+def HasFPU       : Predicate<"Subtarget.hasFPU()">;
+def HasESR       : Predicate<"Subtarget.hasESR()">;
+def HasPVR       : Predicate<"Subtarget.hasPVR()">;
+def HasMul64     : Predicate<"Subtarget.hasMul64()">;
+def HasSqrt      : Predicate<"Subtarget.hasSqrt()">;
+def HasMMU       : Predicate<"Subtarget.hasMMU()">;
+
+//===----------------------------------------------------------------------===//
+// MBlaze Operand, Complex Patterns and Transformations Definitions.
+//===----------------------------------------------------------------------===//
+
+// Instruction operand types
+def brtarget    : Operand<OtherVT>;
+def calltarget  : Operand<i32>;
+def simm16      : Operand<i32>;
+def uimm5       : Operand<i32>;
+def fimm        : Operand<f32>;
+
+// Unsigned Operand
+def uimm16      : Operand<i32> {
+  let PrintMethod = "printUnsignedImm";
+}
+
+// FSL Operand
+def fslimm      : Operand<i32> {
+  let PrintMethod = "printFSLImm";
+}
+
+// Address operand
+def memri : Operand<i32> {
+  let PrintMethod = "printMemOperand";
+  let MIOperandInfo = (ops simm16, CPURegs);
+}
+
+def memrr : Operand<i32> {
+  let PrintMethod = "printMemOperand";
+  let MIOperandInfo = (ops CPURegs, CPURegs);
+}
+
+// Transformation Function - get the lower 16 bits.
+def LO16 : SDNodeXForm<imm, [{
+  return getI32Imm((unsigned)N->getZExtValue() & 0xFFFF);
+}]>;
+
+// Transformation Function - get the higher 16 bits.
+def HI16 : SDNodeXForm<imm, [{
+  return getI32Imm((unsigned)N->getZExtValue() >> 16);
+}]>;
+
+// Node immediate fits as 16-bit sign extended on target immediate.
+// e.g. addi, andi
+def immSExt16  : PatLeaf<(imm), [{
+  return (N->getZExtValue() >> 16) == 0;
+}]>;
+
+// Node immediate fits as 16-bit zero extended on target immediate.
+// The LO16 param means that only the lower 16 bits of the node
+// immediate are caught.
+// e.g. addiu, sltiu
+def immZExt16  : PatLeaf<(imm), [{
+  return (N->getZExtValue() >> 16) == 0;
+}], LO16>;
+
+// FSL immediate field must fit in 4 bits.
+def immZExt4 : PatLeaf<(imm), [{
+      return N->getZExtValue() == ((N->getZExtValue()) & 0xf) ;
+}]>;
+
+// shamt field must fit in 5 bits.
+def immZExt5 : PatLeaf<(imm), [{
+      return N->getZExtValue() == ((N->getZExtValue()) & 0x1f) ;
+}]>;
+
+// MBlaze Address Mode! SDNode frameindex could possibily be a match
+// since load and store instructions from stack used it.
+def iaddr : ComplexPattern<i32, 2, "SelectAddrRegImm", [frameindex], []>;
+def xaddr : ComplexPattern<i32, 2, "SelectAddrRegReg", [], []>;
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+// As stack alignment is always done with addiu, we need a 16-bit immediate
+let Defs = [R1], Uses = [R1] in {
+def ADJCALLSTACKDOWN : MBlazePseudo<(outs), (ins simm16:$amt),
+                                  "${:comment} ADJCALLSTACKDOWN $amt",
+                                  [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP   : MBlazePseudo<(outs),
+                                  (ins uimm16:$amt1, simm16:$amt2),
+                                  "${:comment} ADJCALLSTACKUP $amt1",
+                                  [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+// Some assembly macros need to avoid pseudoinstructions and assembler
+// automatic reodering, we should reorder ourselves.
+def MACRO     : MBlazePseudo<(outs), (ins), ".set macro",     []>;
+def REORDER   : MBlazePseudo<(outs), (ins), ".set reorder",   []>;
+def NOMACRO   : MBlazePseudo<(outs), (ins), ".set nomacro",   []>;
+def NOREORDER : MBlazePseudo<(outs), (ins), ".set noreorder", []>;
+
+// When handling PIC code the assembler needs .cpload and .cprestore
+// directives. If the real instructions corresponding these directives
+// are used, we have the same behavior, but get also a bunch of warnings
+// from the assembler.
+def CPLOAD : MBlazePseudo<(outs), (ins CPURegs:$reg), ".cpload $reg", []>;
+def CPRESTORE : MBlazePseudo<(outs), (ins uimm16:$l), ".cprestore $l\n", []>;
+
+//===----------------------------------------------------------------------===//
+// Instructions specific format
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions
+//===----------------------------------------------------------------------===//
+class Arith<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
+            InstrItinClass itin> :
+            TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+               !strconcat(instr_asm, "   $dst, $b, $c"),
+               [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], itin>;
+
+class ArithI<bits<6> op, string instr_asm, SDNode OpNode,
+             Operand Od, PatLeaf imm_type> :
+             TAI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c),
+                 !strconcat(instr_asm, "   $dst, $b, $c"),
+                 [(set CPURegs:$dst, (OpNode CPURegs:$b, imm_type:$c))], IIAlu>;
+
+class ArithR<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
+            InstrItinClass itin> :
+            TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$c, CPURegs:$b),
+               !strconcat(instr_asm, "   $dst, $c, $b"),
+               [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], itin>;
+
+class ArithRI<bits<6> op, string instr_asm, SDNode OpNode,
+             Operand Od, PatLeaf imm_type> :
+             TAI<op, (outs CPURegs:$dst), (ins Od:$b, CPURegs:$c),
+                 !strconcat(instr_asm, "   $dst, $c, $b"),
+                 [(set CPURegs:$dst, (OpNode imm_type:$b, CPURegs:$c))], IIAlu>;
+
+class ArithN<bits<6> op, bits<11> flags, string instr_asm,
+            InstrItinClass itin> :
+            TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+               !strconcat(instr_asm, "   $dst, $b, $c"),
+               [], itin>;
+
+class ArithNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
+             TAI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c),
+                 !strconcat(instr_asm, "   $dst, $b, $c"),
+                 [], IIAlu>;
+
+class ArithRN<bits<6> op, bits<11> flags, string instr_asm,
+            InstrItinClass itin> :
+            TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$c, CPURegs:$b),
+               !strconcat(instr_asm, "   $dst, $b, $c"),
+               [], itin>;
+
+class ArithRNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
+             TAI<op, (outs CPURegs:$dst), (ins Od:$c, CPURegs:$b),
+                 !strconcat(instr_asm, "   $dst, $b, $c"),
+                 [], IIAlu>;
+
+//===----------------------------------------------------------------------===//
+// Misc Arithmetic Instructions
+//===----------------------------------------------------------------------===//
+
+class Logic<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode> :
+            TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+               !strconcat(instr_asm, "   $dst, $b, $c"),
+               [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu>;
+
+class LogicI<bits<6> op, string instr_asm, SDNode OpNode> :
+             TAI<op, (outs CPURegs:$dst), (ins CPURegs:$b, uimm16:$c),
+                 !strconcat(instr_asm, "   $dst, $b, $c"),
+                 [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt16:$c))],
+                 IIAlu>;
+
+class EffectiveAddress<string instr_asm> :
+          TAI<0x08, (outs CPURegs:$dst), (ins memri:$addr),
+              instr_asm, [(set CPURegs:$dst, iaddr:$addr)], IIAlu>;
+
+//===----------------------------------------------------------------------===//
+// Memory Access Instructions
+//===----------------------------------------------------------------------===//
+class LoadM<bits<6> op, string instr_asm, PatFrag OpNode> :
+            TA<op, 0x000, (outs CPURegs:$dst), (ins memrr:$addr),
+               !strconcat(instr_asm, "   $dst, $addr"),
+               [(set CPURegs:$dst, (OpNode xaddr:$addr))], IILoad>;
+
+class LoadMI<bits<6> op, string instr_asm, PatFrag OpNode> :
+             TAI<op, (outs CPURegs:$dst), (ins memri:$addr),
+                 !strconcat(instr_asm, "   $dst, $addr"),
+                 [(set CPURegs:$dst, (OpNode iaddr:$addr))], IILoad>;
+
+class StoreM<bits<6> op, string instr_asm, PatFrag OpNode> :
+             TA<op, 0x000, (outs), (ins CPURegs:$dst, memrr:$addr),
+                !strconcat(instr_asm, "   $dst, $addr"),
+                [(OpNode CPURegs:$dst, xaddr:$addr)], IIStore>;
+
+class StoreMI<bits<6> op, string instr_asm, PatFrag OpNode> :
+              TAI<op, (outs), (ins CPURegs:$dst, memri:$addr),
+                  !strconcat(instr_asm, "   $dst, $addr"),
+                  [(OpNode CPURegs:$dst, iaddr:$addr)], IIStore>;
+
+//===----------------------------------------------------------------------===//
+// Branch Instructions
+//===----------------------------------------------------------------------===//
+class Branch<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
+             TBR<op, br, flags, (outs), (ins CPURegs:$target),
+                 !strconcat(instr_asm, "   $target"),
+                 [(brind CPURegs:$target)], IIBranch>;
+
+class BranchI<bits<6> op, bits<5> brf, string instr_asm> :
+              TBRI<op, brf, (outs), (ins brtarget:$target),
+                   !strconcat(instr_asm, "   $target"),
+                   [(br bb:$target)], IIBranch>;
+
+//===----------------------------------------------------------------------===//
+// Branch and Link Instructions
+//===----------------------------------------------------------------------===//
+class BranchL<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
+              TBRL<op, br, flags, (outs), (ins CPURegs:$target),
+                   !strconcat(instr_asm, "   r15, $target"),
+                   [], IIBranch>;
+
+class BranchLI<bits<6> op, bits<5> br, string instr_asm> :
+               TBRLI<op, br, (outs), (ins calltarget:$target),
+                     !strconcat(instr_asm, "   r15, $target"),
+                     [], IIBranch>;
+
+//===----------------------------------------------------------------------===//
+// Conditional Branch Instructions
+//===----------------------------------------------------------------------===//
+class BranchC<bits<6> op, bits<5> br, bits<11> flags, string instr_asm,
+              PatFrag cond_op> :
+              TBRC<op, br, flags, (outs),
+                   (ins CPURegs:$a, CPURegs:$b, brtarget:$offset),
+                   !strconcat(instr_asm, "   $a, $b, $offset"),
+                   [], IIBranch>; 
+                   //(brcond (cond_op CPURegs:$a, CPURegs:$b), bb:$offset)],
+                   //IIBranch>;
+
+class BranchCI<bits<6> op, bits<5> br, string instr_asm, PatFrag cond_op> :
+               TBRCI<op, br, (outs), (ins CPURegs:$a, brtarget:$offset),
+                     !strconcat(instr_asm, "   $a, $offset"),
+                     [], IIBranch>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze arithmetic instructions
+//===----------------------------------------------------------------------===//
+
+let isCommutable = 1, isAsCheapAsAMove = 1 in {
+    def ADD    :  Arith<0x00, 0x000, "add    ", add,  IIAlu>;
+    def ADDC   :  Arith<0x02, 0x000, "addc   ", adde, IIAlu>;
+    def ADDK   :  Arith<0x04, 0x000, "addk   ", addc, IIAlu>;
+    def ADDKC  : ArithN<0x06, 0x000, "addkc  ", IIAlu>;
+    def AND    :  Logic<0x21, 0x000, "and    ", and>;
+    def OR     :  Logic<0x20, 0x000, "or     ", or>;
+    def XOR    :  Logic<0x22, 0x000, "xor    ", xor>;
+}
+
+let isAsCheapAsAMove = 1 in {
+    def ANDN   :  ArithN<0x23, 0x000, "andn   ", IIAlu>;
+    def CMP    :  ArithN<0x05, 0x001, "cmp    ", IIAlu>;
+    def CMPU   :  ArithN<0x05, 0x003, "cmpu   ", IIAlu>;
+    def RSUB   :  ArithR<0x01, 0x000, "rsub   ", sub,  IIAlu>;
+    def RSUBC  :  ArithR<0x03, 0x000, "rsubc  ", sube, IIAlu>;
+    def RSUBK  :  ArithR<0x05, 0x000, "rsubk  ", subc, IIAlu>;
+    def RSUBKC : ArithRN<0x07, 0x000, "rsubkc ", IIAlu>;
+}
+
+let isCommutable = 1, Predicates=[HasMul] in {
+    def MUL    : Arith<0x10, 0x000, "mul    ", mul,   IIAlu>;
+}
+
+let isCommutable = 1, Predicates=[HasMul,HasMul64] in {
+    def MULH   : Arith<0x10, 0x001, "mulh   ", mulhs, IIAlu>;
+    def MULHU  : Arith<0x10, 0x003, "mulhu  ", mulhu, IIAlu>;
+}
+
+let Predicates=[HasMul,HasMul64] in {
+    def MULHSU : ArithN<0x10, 0x002, "mulhsu ", IIAlu>;
+}
+
+let Predicates=[HasBarrel] in {
+    def BSRL   :   Arith<0x11, 0x000, "bsrl   ", srl, IIAlu>;
+    def BSRA   :   Arith<0x11, 0x200, "bsra   ", sra, IIAlu>;
+    def BSLL   :   Arith<0x11, 0x400, "bsll   ", shl, IIAlu>;
+    def BSRLI  :  ArithI<0x11, "bsrli  ", srl, uimm5, immZExt5>;
+    def BSRAI  :  ArithI<0x11, "bsrai  ", sra, uimm5, immZExt5>;
+    def BSLLI  :  ArithI<0x11, "bslli  ", shl, uimm5, immZExt5>;
+}
+
+let Predicates=[HasDiv] in {
+    def IDIV   :  Arith<0x12, 0x000, "idiv   ", sdiv, IIAlu>;
+    def IDIVU  :  Arith<0x12, 0x002, "idivu  ", udiv, IIAlu>;
+}
+
+//===----------------------------------------------------------------------===//
+// MBlaze immediate mode arithmetic instructions
+//===----------------------------------------------------------------------===//
+
+let isAsCheapAsAMove = 1 in {
+    def ADDI    :   ArithI<0x08, "addi   ", add,  simm16, immSExt16>;
+    def ADDIC   :  ArithNI<0x0A, "addic  ", simm16, immSExt16>;
+    def ADDIK   :  ArithNI<0x0C, "addik  ", simm16, immSExt16>;
+    def ADDIKC  :   ArithI<0x0E, "addikc ", addc, simm16, immSExt16>;
+    def RSUBI   :   ArithRI<0x09, "rsubi  ", sub,  simm16, immSExt16>;
+    def RSUBIC  :  ArithRNI<0x0B, "rsubi  ", simm16, immSExt16>;
+    def RSUBIK  :  ArithRNI<0x0E, "rsubic ", simm16, immSExt16>;
+    def RSUBIKC :   ArithRI<0x0F, "rsubikc", subc, simm16, immSExt16>;
+    def ANDNI   :  ArithNI<0x2B, "andni  ", uimm16, immZExt16>;
+    def ANDI    :   LogicI<0x29, "andi   ", and>;
+    def ORI     :   LogicI<0x28, "ori    ", or>;
+    def XORI    :   LogicI<0x2A, "xori   ", xor>;
+}
+
+let Predicates=[HasMul] in {
+    def MULI    :   ArithI<0x18, "muli   ", mul, simm16, immSExt16>;
+}
+
+//===----------------------------------------------------------------------===//
+// MBlaze memory access instructions
+//===----------------------------------------------------------------------===//
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+    def LBU  :  LoadM<0x30, "lbu    ", zextloadi8>;
+    def LHU  :  LoadM<0x31, "lhu    ", zextloadi16>;
+    def LW   :  LoadM<0x32, "lw     ", load>;
+
+    def LBUI : LoadMI<0x30, "lbui   ", zextloadi8>;
+    def LHUI : LoadMI<0x31, "lhui   ", zextloadi16>;
+    def LWI  : LoadMI<0x32, "lwi    ", load>;
+}
+
+    def SB  :  StoreM<0x34, "sb     ", truncstorei8>;
+    def SH  :  StoreM<0x35, "sh     ", truncstorei16>;
+    def SW  :  StoreM<0x36, "sw     ", store>;
+
+    def SBI : StoreMI<0x34, "sbi    ", truncstorei8>;
+    def SHI : StoreMI<0x35, "shi    ", truncstorei16>;
+    def SWI : StoreMI<0x36, "swi    ", store>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze branch instructions
+//===----------------------------------------------------------------------===//
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+    def BRI    :  BranchI<0x2E, 0x00, "bri    ">;
+    def BRAI   :  BranchI<0x2E, 0x08, "brai   ">;
+    def BEQI   : BranchCI<0x2F, 0x00, "beqi   ", seteq>;
+    def BNEI   : BranchCI<0x2F, 0x01, "bnei   ", setne>;
+    def BLTI   : BranchCI<0x2F, 0x02, "blti   ", setlt>;
+    def BLEI   : BranchCI<0x2F, 0x03, "blei   ", setle>;
+    def BGTI   : BranchCI<0x2F, 0x04, "bgti   ", setgt>;
+    def BGEI   : BranchCI<0x2F, 0x05, "bgei   ", setge>;
+}
+
+let isBranch = 1, isIndirectBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+    def BR     :   Branch<0x26, 0x00, 0x000, "br     ">;
+    def BRA    :   Branch<0x26, 0x08, 0x000, "bra    ">;
+    def BEQ    :  BranchC<0x27, 0x00, 0x000, "beq    ", seteq>;
+    def BNE    :  BranchC<0x27, 0x01, 0x000, "bne    ", setne>;
+    def BLT    :  BranchC<0x27, 0x02, 0x000, "blt    ", setlt>;
+    def BLE    :  BranchC<0x27, 0x03, 0x000, "ble    ", setle>;
+    def BGT    :  BranchC<0x27, 0x04, 0x000, "bgt    ", setgt>;
+    def BGE    :  BranchC<0x27, 0x05, 0x000, "bge    ", setge>;
+}
+
+let isBranch = 1, isTerminator = 1, hasDelaySlot = 1, hasCtrlDep = 1 in {
+    def BRID   :  BranchI<0x2E, 0x10, "brid   ">;
+    def BRAID  :  BranchI<0x2E, 0x18, "braid  ">;
+    def BEQID  : BranchCI<0x2F, 0x10, "beqid  ", seteq>;
+    def BNEID  : BranchCI<0x2F, 0x11, "bneid  ", setne>;
+    def BLTID  : BranchCI<0x2F, 0x12, "bltid  ", setlt>;
+    def BLEID  : BranchCI<0x2F, 0x13, "bleid  ", setle>;
+    def BGTID  : BranchCI<0x2F, 0x14, "bgtid  ", setgt>;
+    def BGEID  : BranchCI<0x2F, 0x15, "bgeid  ", setge>;
+}
+
+let isBranch = 1, isIndirectBranch = 1, isTerminator = 1,
+    hasDelaySlot = 1, hasCtrlDep = 1 in {
+    def BRD    :   Branch<0x26, 0x10, 0x000, "brd    ">;
+    def BRAD   :   Branch<0x26, 0x18, 0x000, "brad   ">;
+    def BEQD   :  BranchC<0x27, 0x10, 0x000, "beqd   ", seteq>;
+    def BNED   :  BranchC<0x27, 0x11, 0x000, "bned   ", setne>;
+    def BLTD   :  BranchC<0x27, 0x12, 0x000, "bltd   ", setlt>;
+    def BLED   :  BranchC<0x27, 0x13, 0x000, "bled   ", setle>;
+    def BGTD   :  BranchC<0x27, 0x14, 0x000, "bgtd   ", setgt>;
+    def BGED   :  BranchC<0x27, 0x15, 0x000, "bged   ", setge>;
+}
+
+let isCall = 1, hasCtrlDep = 1, isIndirectBranch = 1,
+    Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12],
+    Uses = [R1,R5,R6,R7,R8,R9,R10] in {
+    def BRL    : BranchL<0x26, 0x04, 0x000, "brl    ">;
+    def BRAL   : BranchL<0x26, 0x0C, 0x000, "bral   ">;
+}
+
+let isCall = 1, hasDelaySlot = 1, hasCtrlDep = 1,
+    Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12],
+    Uses = [R1,R5,R6,R7,R8,R9,R10] in {
+    def BRLID  : BranchLI<0x2E, 0x14, "brlid  ">;
+    def BRALID : BranchLI<0x2E, 0x1C, "bralid ">;
+}
+
+let isCall = 1, hasDelaySlot = 1, hasCtrlDep = 1, isIndirectBranch = 1,
+    Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12],
+    Uses = [R1,R5,R6,R7,R8,R9,R10] in {
+    def BRLD   : BranchL<0x26, 0x14, 0x000, "brld   ">;
+    def BRALD  : BranchL<0x26, 0x1C, 0x000, "brald  ">;
+}
+
+let isReturn=1, isTerminator=1, hasDelaySlot=1,
+    isBarrier=1, hasCtrlDep=1, imm16=0x8 in {
+    def RTSD   : TRET<0x2D, (outs), (ins CPURegs:$target),
+                      "rtsd      $target, 8",
+                      [(MBlazeRet CPURegs:$target)],
+                      IIBranch>;
+}
+
+//===----------------------------------------------------------------------===//
+// MBlaze misc instructions
+//===----------------------------------------------------------------------===//
+
+let addr = 0 in {
+    def NOP :  TADDR<0x00, (outs), (ins), "nop    ", [], IIAlu>;
+}
+
+let usesCustomInserter = 1 in {
+  //class PseudoSelCC<RegisterClass RC, string asmstr>:
+  //  MBlazePseudo<(outs RC:$D), (ins RC:$T, RC:$F, CPURegs:$CMP), asmstr,
+  //  [(set RC:$D, (MBlazeSelectCC RC:$T, RC:$F, CPURegs:$CMP))]>;
+  //def Select_CC : PseudoSelCC<CPURegs, "# MBlazeSelect_CC">;
+
+  def Select_CC : MBlazePseudo<(outs CPURegs:$dst),
+    (ins CPURegs:$T, CPURegs:$F, CPURegs:$CMP, i32imm:$CC),
+    "; SELECT_CC PSEUDO!",
+    []>;
+
+  def ShiftL : MBlazePseudo<(outs CPURegs:$dst),
+    (ins CPURegs:$L, CPURegs:$R),
+    "; ShiftL PSEUDO!",
+    []>;
+
+  def ShiftRA : MBlazePseudo<(outs CPURegs:$dst),
+    (ins CPURegs:$L, CPURegs:$R),
+    "; ShiftRA PSEUDO!",
+    []>;
+
+  def ShiftRL : MBlazePseudo<(outs CPURegs:$dst),
+    (ins CPURegs:$L, CPURegs:$R),
+    "; ShiftRL PSEUDO!",
+    []>;
+}
+
+
+let rb = 0 in {
+    def SEXT16 : TA<0x24, 0x061, (outs CPURegs:$dst), (ins CPURegs:$src),
+                    "sext16  $dst, $src", [], IIAlu>;
+    def SEXT8  : TA<0x24, 0x060, (outs CPURegs:$dst), (ins CPURegs:$src),
+                    "sext8   $dst, $src", [], IIAlu>;
+    def SRL    : TA<0x24, 0x041, (outs CPURegs:$dst), (ins CPURegs:$src),
+                    "srl     $dst, $src", [], IIAlu>;
+    def SRA    : TA<0x24, 0x001, (outs CPURegs:$dst), (ins CPURegs:$src),
+                    "sra     $dst, $src", [], IIAlu>;
+    def SRC    : TA<0x24, 0x021, (outs CPURegs:$dst), (ins CPURegs:$src),
+                    "src     $dst, $src", [], IIAlu>;
+}
+
+def LEA_ADDI : EffectiveAddress<"addi    $dst, ${addr:stackloc}">;
+
+//===----------------------------------------------------------------------===//
+//  Arbitrary patterns that map to one or more instructions
+//===----------------------------------------------------------------------===//
+
+// Small immediates
+def : Pat<(i32 0), (ADD R0, R0)>;
+def : Pat<(i32 immSExt16:$imm), (ADDI R0, imm:$imm)>;
+def : Pat<(i32 immZExt16:$imm), (ORI R0, imm:$imm)>;
+
+// Arbitrary immediates
+def : Pat<(i32 imm:$imm), (ADDI R0, imm:$imm)>;
+
+// In register sign extension
+def : Pat<(sext_inreg CPURegs:$src, i16), (SEXT16 CPURegs:$src)>;
+def : Pat<(sext_inreg CPURegs:$src, i8),  (SEXT8 CPURegs:$src)>;
+
+// Call
+def : Pat<(MBlazeJmpLink (i32 tglobaladdr:$dst)), (BRLID tglobaladdr:$dst)>;
+def : Pat<(MBlazeJmpLink (i32 texternalsym:$dst)),(BRLID texternalsym:$dst)>;
+def : Pat<(MBlazeJmpLink CPURegs:$dst), (BRLD CPURegs:$dst)>;
+
+// Shift Instructions
+def : Pat<(shl CPURegs:$L, CPURegs:$R), (ShiftL CPURegs:$L, CPURegs:$R)>;
+def : Pat<(sra CPURegs:$L, CPURegs:$R), (ShiftRA CPURegs:$L, CPURegs:$R)>;
+def : Pat<(srl CPURegs:$L, CPURegs:$R), (ShiftRL CPURegs:$L, CPURegs:$R)>;
+
+// SET_CC operations
+def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETEQ),
+          (Select_CC (ADDI R0, 1), (ADDI R0, 0), 
+                     (CMP CPURegs:$L, CPURegs:$R), 1)>;
+def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETNE),
+          (Select_CC (ADDI R0, 1), (ADDI R0, 0), 
+                     (CMP CPURegs:$L, CPURegs:$R), 2)>;
+def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETGT),
+          (Select_CC (ADDI R0, 1), (ADDI R0, 0), 
+                     (CMP CPURegs:$L, CPURegs:$R), 3)>;
+def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETLT),
+          (Select_CC (ADDI R0, 1), (ADDI R0, 0), 
+                     (CMP CPURegs:$L, CPURegs:$R), 4)>;
+def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETGE),
+          (Select_CC (ADDI R0, 1), (ADDI R0, 0), 
+                     (CMP CPURegs:$L, CPURegs:$R), 5)>;
+def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETLE),
+          (Select_CC (ADDI R0, 1), (ADDI R0, 0), 
+                     (CMP CPURegs:$L, CPURegs:$R), 6)>;
+def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETUGT),
+          (Select_CC (ADDI R0, 1), (ADDI R0, 0), 
+                     (CMPU CPURegs:$L, CPURegs:$R), 3)>;
+def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETULT),
+          (Select_CC (ADDI R0, 1), (ADDI R0, 0), 
+                     (CMPU CPURegs:$L, CPURegs:$R), 4)>;
+def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETUGE),
+          (Select_CC (ADDI R0, 1), (ADDI R0, 0), 
+                     (CMPU CPURegs:$L, CPURegs:$R), 5)>;
+def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETULE),
+          (Select_CC (ADDI R0, 1), (ADDI R0, 0), 
+                     (CMPU CPURegs:$L, CPURegs:$R), 6)>;
+
+// SELECT operations
+def : Pat<(select CPURegs:$C, CPURegs:$T, CPURegs:$F),
+          (Select_CC CPURegs:$T, CPURegs:$F, CPURegs:$C, 2)>;
+
+// SELECT_CC 
+def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETEQ),
+          (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 1)>;
+def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETNE),
+          (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 2)>;
+def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETGT),
+          (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 3)>;
+def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETLT),
+          (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 4)>;
+def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETGE),
+          (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 5)>;
+def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETLE),
+          (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 6)>;
+def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETUGT),
+          (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 3)>;
+def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETULT),
+          (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 4)>;
+def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETUGE),
+          (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 5)>;
+def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETULE),
+          (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 6)>;
+
+// BRCOND instructions
+def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETEQ), bb:$T),
+          (BEQID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
+def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETNE), bb:$T),
+          (BNEID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
+def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETGT), bb:$T),
+          (BGTID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
+def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETLT), bb:$T),
+          (BLTID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
+def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETGE), bb:$T),
+          (BGEID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
+def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETLE), bb:$T),
+          (BLEID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
+def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETUGT), bb:$T),
+          (BGTID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>;
+def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETULT), bb:$T),
+          (BLTID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>;
+def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETUGE), bb:$T),
+          (BGEID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>;
+def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETULE), bb:$T),
+          (BLEID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>;
+def : Pat<(brcond CPURegs:$C, bb:$T),
+          (BNEID CPURegs:$C, bb:$T)>;
+
+// Jump tables, global addresses, and constant pools
+def : Pat<(MBWrapper tglobaladdr:$in), (ORI R0, tglobaladdr:$in)>;
+def : Pat<(MBWrapper tjumptable:$in),  (ORI R0, tjumptable:$in)>;
+def : Pat<(MBWrapper tconstpool:$in),  (ORI R0, tconstpool:$in)>;
+
+// Misc instructions
+def : Pat<(and CPURegs:$lh, (not CPURegs:$rh)),(ANDN CPURegs:$lh, CPURegs:$rh)>;
+
+// Arithmetic with immediates
+def : Pat<(add CPURegs:$in, imm:$imm),(ADDI CPURegs:$in, imm:$imm)>;
+def : Pat<(or CPURegs:$in, imm:$imm),(ORI CPURegs:$in, imm:$imm)>;
+def : Pat<(xor CPURegs:$in, imm:$imm),(XORI CPURegs:$in, imm:$imm)>;
+
+// extended load and stores
+def : Pat<(extloadi1  iaddr:$src), (LBUI iaddr:$src)>;
+def : Pat<(extloadi8  iaddr:$src), (LBUI iaddr:$src)>;
+def : Pat<(extloadi16 iaddr:$src), (LHUI iaddr:$src)>;
+def : Pat<(extloadi1  xaddr:$src), (LBU  xaddr:$src)>;
+def : Pat<(extloadi8  xaddr:$src), (LBU  xaddr:$src)>;
+def : Pat<(extloadi16 xaddr:$src), (LHU  xaddr:$src)>;
+
+def : Pat<(sextloadi1  iaddr:$src), (SEXT8  (LBUI iaddr:$src))>;
+def : Pat<(sextloadi8  iaddr:$src), (SEXT8  (LBUI iaddr:$src))>;
+def : Pat<(sextloadi16 iaddr:$src), (SEXT16 (LHUI iaddr:$src))>;
+def : Pat<(sextloadi1  xaddr:$src), (SEXT8  (LBU xaddr:$src))>;
+def : Pat<(sextloadi8  xaddr:$src), (SEXT8  (LBU xaddr:$src))>;
+def : Pat<(sextloadi16 xaddr:$src), (SEXT16 (LHU xaddr:$src))>;
+
+// peepholes
+def : Pat<(store (i32 0), iaddr:$dst), (SWI R0, iaddr:$dst)>;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Support
+//===----------------------------------------------------------------------===//
+include "MBlazeInstrFSL.td"
+include "MBlazeInstrFPU.td"
diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
new file mode 100644
index 0000000..c8faffc
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
@@ -0,0 +1,109 @@
+//===- MBlazeIntrinsicInfo.cpp - Intrinsic Information -00-------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of TargetIntrinsicInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeIntrinsicInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstring>
+
+using namespace llvm;
+
+namespace mblazeIntrinsic {
+
+  enum ID {
+    last_non_mblaze_intrinsic = Intrinsic::num_intrinsics-1,
+#define GET_INTRINSIC_ENUM_VALUES
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_ENUM_VALUES
+    , num_mblaze_intrinsics
+  };
+
+#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+}
+
+std::string MBlazeIntrinsicInfo::getName(unsigned IntrID, const Type **Tys,
+                                         unsigned numTys) const {
+  static const char *const names[] = {
+#define GET_INTRINSIC_NAME_TABLE
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_NAME_TABLE
+  };
+
+  assert(!isOverloaded(IntrID) && "MBlaze intrinsics are not overloaded");
+  if (IntrID < Intrinsic::num_intrinsics)
+    return 0;
+  assert(IntrID < mblazeIntrinsic::num_mblaze_intrinsics && 
+         "Invalid intrinsic ID");
+
+  std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
+  return Result;
+}
+
+unsigned MBlazeIntrinsicInfo::
+lookupName(const char *Name, unsigned Len) const {
+#define GET_FUNCTION_RECOGNIZER
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_FUNCTION_RECOGNIZER
+  return 0;
+}
+
+unsigned MBlazeIntrinsicInfo::
+lookupGCCName(const char *Name) const {
+    return mblazeIntrinsic::getIntrinsicForGCCBuiltin("mblaze",Name);
+}
+
+bool MBlazeIntrinsicInfo::isOverloaded(unsigned IntrID) const {
+  // Overload Table
+  const bool OTable[] = {
+#define GET_INTRINSIC_OVERLOAD_TABLE
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_OVERLOAD_TABLE
+  };
+  if (IntrID == 0)
+    return false;
+  else
+    return OTable[IntrID - Intrinsic::num_intrinsics];
+}
+
+/// This defines the "getAttributes(ID id)" method.
+#define GET_INTRINSIC_ATTRIBUTES
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_ATTRIBUTES
+
+static const FunctionType *getType(LLVMContext &Context, unsigned id) {
+  const Type *ResultTy = NULL;
+  std::vector<const Type*> ArgTys;
+  bool IsVarArg = false;
+  
+#define GET_INTRINSIC_GENERATOR
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_GENERATOR
+
+  return FunctionType::get(ResultTy, ArgTys, IsVarArg); 
+}
+
+Function *MBlazeIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
+                                                const Type **Tys,
+                                                unsigned numTy) const {
+  assert(!isOverloaded(IntrID) && "MBlaze intrinsics are not overloaded");
+  AttrListPtr AList = getAttributes((mblazeIntrinsic::ID) IntrID);
+  return cast<Function>(M->getOrInsertFunction(getName(IntrID),
+                                               getType(M->getContext(), IntrID),
+                                               AList));
+}
diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.h b/lib/Target/MBlaze/MBlazeIntrinsicInfo.h
new file mode 100644
index 0000000..9804c77
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.h
@@ -0,0 +1,33 @@
+//===- MBlazeIntrinsicInfo.h - MBlaze Intrinsic Information -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of TargetIntrinsicInfo.
+//
+//===----------------------------------------------------------------------===//
+#ifndef MBLAZEINTRINSICS_H
+#define MBLAZEINTRINSICS_H
+
+#include "llvm/Target/TargetIntrinsicInfo.h"
+
+namespace llvm {
+
+  class MBlazeIntrinsicInfo : public TargetIntrinsicInfo {
+  public:
+    std::string getName(unsigned IntrID, const Type **Tys = 0,
+                        unsigned numTys = 0) const;
+    unsigned lookupName(const char *Name, unsigned Len) const;
+    unsigned lookupGCCName(const char *Name) const;
+    bool isOverloaded(unsigned IID) const;
+    Function *getDeclaration(Module *M, unsigned ID, const Type **Tys = 0,
+                             unsigned numTys = 0) const;
+  };
+
+}
+
+#endif
diff --git a/lib/Target/MBlaze/MBlazeIntrinsics.td b/lib/Target/MBlaze/MBlazeIntrinsics.td
new file mode 100644
index 0000000..76eb563
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeIntrinsics.td
@@ -0,0 +1,137 @@
+//===- IntrinsicsMBlaze.td - Defines MBlaze intrinsics -----*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the MicroBlaze-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Definitions for all MBlaze intrinsics.
+//
+
+// MBlaze intrinsic classes.
+let TargetPrefix = "mblaze", isTarget = 1 in { 
+  class MBFSL_Get_Intrinsic : Intrinsic<[llvm_i32_ty],
+                                        [llvm_i32_ty],
+                                        [IntrWriteMem]>;
+
+  class MBFSL_Put_Intrinsic : Intrinsic<[llvm_void_ty],
+                                        [llvm_i32_ty, llvm_i32_ty],
+                                        [IntrWriteMem]>;
+
+  class MBFSL_PutT_Intrinsic : Intrinsic<[llvm_void_ty],
+                                         [llvm_i32_ty],
+                                         [IntrWriteMem]>;
+}
+
+//===----------------------------------------------------------------------===//
+// MicroBlaze FSL Get Intrinsic Definitions.
+//
+
+def int_mblaze_fsl_get      : GCCBuiltin<"__builtin_mblaze_fsl_get">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_aget     : GCCBuiltin<"__builtin_mblaze_fsl_aget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_cget     : GCCBuiltin<"__builtin_mblaze_fsl_cget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_caget    : GCCBuiltin<"__builtin_mblaze_fsl_caget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_eget     : GCCBuiltin<"__builtin_mblaze_fsl_eget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_eaget    : GCCBuiltin<"__builtin_mblaze_fsl_eaget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_ecget    : GCCBuiltin<"__builtin_mblaze_fsl_ecget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_ecaget   : GCCBuiltin<"__builtin_mblaze_fsl_ecaget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_nget     : GCCBuiltin<"__builtin_mblaze_fsl_nget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_naget    : GCCBuiltin<"__builtin_mblaze_fsl_naget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_ncget    : GCCBuiltin<"__builtin_mblaze_fsl_ncget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_ncaget   : GCCBuiltin<"__builtin_mblaze_fsl_ncaget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_neget    : GCCBuiltin<"__builtin_mblaze_fsl_neget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_neaget   : GCCBuiltin<"__builtin_mblaze_fsl_neaget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_necget   : GCCBuiltin<"__builtin_mblaze_fsl_necget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_necaget  : GCCBuiltin<"__builtin_mblaze_fsl_necaget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tget     : GCCBuiltin<"__builtin_mblaze_fsl_tget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_taget    : GCCBuiltin<"__builtin_mblaze_fsl_taget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tcget    : GCCBuiltin<"__builtin_mblaze_fsl_tcget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tcaget   : GCCBuiltin<"__builtin_mblaze_fsl_tcaget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_teget    : GCCBuiltin<"__builtin_mblaze_fsl_teget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_teaget   : GCCBuiltin<"__builtin_mblaze_fsl_teaget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tecget   : GCCBuiltin<"__builtin_mblaze_fsl_tecget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tecaget  : GCCBuiltin<"__builtin_mblaze_fsl_tecaget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tnget    : GCCBuiltin<"__builtin_mblaze_fsl_tnget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tnaget   : GCCBuiltin<"__builtin_mblaze_fsl_tnaget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tncget   : GCCBuiltin<"__builtin_mblaze_fsl_tncget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tncaget  : GCCBuiltin<"__builtin_mblaze_fsl_tncaget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tneget   : GCCBuiltin<"__builtin_mblaze_fsl_tneget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tneaget  : GCCBuiltin<"__builtin_mblaze_fsl_tneaget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tnecget  : GCCBuiltin<"__builtin_mblaze_fsl_tnecget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tnecaget : GCCBuiltin<"__builtin_mblaze_fsl_tnecaget">,
+                              MBFSL_Get_Intrinsic;
+
+//===----------------------------------------------------------------------===//
+// MicroBlaze FSL Put Intrinsic Definitions.
+//
+
+def int_mblaze_fsl_put     : GCCBuiltin<"__builtin_mblaze_fsl_put">,
+                             MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_aput    : GCCBuiltin<"__builtin_mblaze_fsl_aput">,
+                             MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_cput    : GCCBuiltin<"__builtin_mblaze_fsl_cput">,
+                             MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_caput   : GCCBuiltin<"__builtin_mblaze_fsl_caput">,
+                             MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_nput    : GCCBuiltin<"__builtin_mblaze_fsl_nput">,
+                             MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_naput   : GCCBuiltin<"__builtin_mblaze_fsl_naput">,
+                             MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_ncput   : GCCBuiltin<"__builtin_mblaze_fsl_ncput">,
+                             MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_ncaput  : GCCBuiltin<"__builtin_mblaze_fsl_ncaput">,
+                             MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_tput    : GCCBuiltin<"__builtin_mblaze_fsl_tput">,
+                             MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_taput   : GCCBuiltin<"__builtin_mblaze_fsl_taput">,
+                             MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tcput   : GCCBuiltin<"__builtin_mblaze_fsl_tcput">,
+                             MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tcaput  : GCCBuiltin<"__builtin_mblaze_fsl_tcaput">,
+                             MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tnput   : GCCBuiltin<"__builtin_mblaze_fsl_tnput">,
+                             MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tnaput  : GCCBuiltin<"__builtin_mblaze_fsl_tnaput">,
+                             MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tncput  : GCCBuiltin<"__builtin_mblaze_fsl_tncput">,
+                             MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tncaput : GCCBuiltin<"__builtin_mblaze_fsl_tncaput">,
+                             MBFSL_PutT_Intrinsic;
diff --git a/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp b/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp
new file mode 100644
index 0000000..7ae465d
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp
@@ -0,0 +1,27 @@
+//===-- MBlazeMCAsmInfo.cpp - MBlaze asm properties -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the MBlazeMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeMCAsmInfo.h"
+using namespace llvm;
+
+MBlazeMCAsmInfo::MBlazeMCAsmInfo(const Target &T, const StringRef &TT) {
+  AlignmentIsInBytes          = false;
+  Data16bitsDirective         = "\t.half\t";
+  Data32bitsDirective         = "\t.word\t";
+  Data64bitsDirective         = 0;
+  PrivateGlobalPrefix         = "$";
+  CommentString               = "#";
+  ZeroDirective               = "\t.space\t";
+  GPRel32Directive            = "\t.gpword\t";
+  HasSetDirective             = false;
+}
diff --git a/lib/Target/MBlaze/MBlazeMCAsmInfo.h b/lib/Target/MBlaze/MBlazeMCAsmInfo.h
new file mode 100644
index 0000000..bccb418
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeMCAsmInfo.h
@@ -0,0 +1,30 @@
+//=====-- MBlazeMCAsmInfo.h - MBlaze asm properties -----------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MBlazeMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZETARGETASMINFO_H
+#define MBLAZETARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class Target;
+  class StringRef;
+  
+  class MBlazeMCAsmInfo : public MCAsmInfo {
+  public:
+    explicit MBlazeMCAsmInfo(const Target &T, const StringRef &TT);
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/MBlaze/MBlazeMachineFunction.h b/lib/Target/MBlaze/MBlazeMachineFunction.h
new file mode 100644
index 0000000..08d4dca
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeMachineFunction.h
@@ -0,0 +1,136 @@
+//===-- MBlazeMachineFunctionInfo.h - Private data ----------------*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MBlaze specific subclass of MachineFunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZE_MACHINE_FUNCTION_INFO_H
+#define MBLAZE_MACHINE_FUNCTION_INFO_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+
+namespace llvm {
+
+/// MBlazeFunctionInfo - This class is derived from MachineFunction private
+/// MBlaze target-specific information for each MachineFunction.
+class MBlazeFunctionInfo : public MachineFunctionInfo {
+
+private:
+  /// Holds for each function where on the stack the Frame Pointer must be 
+  /// saved. This is used on Prologue and Epilogue to emit FP save/restore
+  int FPStackOffset;
+
+  /// Holds for each function where on the stack the Return Address must be 
+  /// saved. This is used on Prologue and Epilogue to emit RA save/restore
+  int RAStackOffset;
+
+  /// At each function entry a special bitmask directive must be emitted
+  /// to help in debugging CPU callee saved registers. It needs a negative
+  /// offset from the final stack size and its higher register location on
+  /// the stack.
+  int CPUTopSavedRegOff;
+
+  /// MBlazeFIHolder - Holds a FrameIndex and it's Stack Pointer Offset
+  struct MBlazeFIHolder {
+
+    int FI;
+    int SPOffset;
+
+    MBlazeFIHolder(int FrameIndex, int StackPointerOffset)
+      : FI(FrameIndex), SPOffset(StackPointerOffset) {}
+  };
+
+  /// When PIC is used the GP must be saved on the stack on the function 
+  /// prologue and must be reloaded from this stack location after every 
+  /// call. A reference to its stack location and frame index must be kept 
+  /// to be used on emitPrologue and processFunctionBeforeFrameFinalized.
+  MBlazeFIHolder GPHolder;
+
+  /// On LowerFormalArguments the stack size is unknown, so the Stack
+  /// Pointer Offset calculation of "not in register arguments" must be 
+  /// postponed to emitPrologue. 
+  SmallVector<MBlazeFIHolder, 16> FnLoadArgs;
+  bool HasLoadArgs;
+
+  // When VarArgs, we must write registers back to caller stack, preserving 
+  // on register arguments. Since the stack size is unknown on 
+  // LowerFormalArguments, the Stack Pointer Offset calculation must be
+  // postponed to emitPrologue. 
+  SmallVector<MBlazeFIHolder, 4> FnStoreVarArgs;
+  bool HasStoreVarArgs;
+
+  /// SRetReturnReg - Some subtargets require that sret lowering includes
+  /// returning the value of the returned struct in a register. This field
+  /// holds the virtual register into which the sret argument is passed.
+  unsigned SRetReturnReg;
+
+  /// GlobalBaseReg - keeps track of the virtual register initialized for
+  /// use as the global base register. This is used for PIC in some PIC
+  /// relocation models.
+  unsigned GlobalBaseReg;
+
+public:
+  MBlazeFunctionInfo(MachineFunction& MF) 
+  : FPStackOffset(0), RAStackOffset(0), CPUTopSavedRegOff(0), 
+    GPHolder(-1,-1), HasLoadArgs(false), HasStoreVarArgs(false),
+    SRetReturnReg(0), GlobalBaseReg(0)
+  {}
+
+  int getFPStackOffset() const { return FPStackOffset; }
+  void setFPStackOffset(int Off) { FPStackOffset = Off; }
+
+  int getRAStackOffset() const { return RAStackOffset; }
+  void setRAStackOffset(int Off) { RAStackOffset = Off; }
+
+  int getCPUTopSavedRegOff() const { return CPUTopSavedRegOff; }
+  void setCPUTopSavedRegOff(int Off) { CPUTopSavedRegOff = Off; }
+
+  int getGPStackOffset() const { return GPHolder.SPOffset; }
+  int getGPFI() const { return GPHolder.FI; }
+  void setGPStackOffset(int Off) { GPHolder.SPOffset = Off; }
+  void setGPFI(int FI) { GPHolder.FI = FI; }
+  bool needGPSaveRestore() const { return GPHolder.SPOffset != -1; }
+
+  bool hasLoadArgs() const { return HasLoadArgs; }
+  bool hasStoreVarArgs() const { return HasStoreVarArgs; } 
+
+  void recordLoadArgsFI(int FI, int SPOffset) {
+    if (!HasLoadArgs) HasLoadArgs=true;
+    FnLoadArgs.push_back(MBlazeFIHolder(FI, SPOffset));
+  }
+  void recordStoreVarArgsFI(int FI, int SPOffset) {
+    if (!HasStoreVarArgs) HasStoreVarArgs=true;
+    FnStoreVarArgs.push_back(MBlazeFIHolder(FI, SPOffset));
+  }
+
+  void adjustLoadArgsFI(MachineFrameInfo *MFI) const {
+    if (!hasLoadArgs()) return;
+    for (unsigned i = 0, e = FnLoadArgs.size(); i != e; ++i) 
+      MFI->setObjectOffset( FnLoadArgs[i].FI, FnLoadArgs[i].SPOffset );
+  }
+  void adjustStoreVarArgsFI(MachineFrameInfo *MFI) const {
+    if (!hasStoreVarArgs()) return; 
+    for (unsigned i = 0, e = FnStoreVarArgs.size(); i != e; ++i) 
+      MFI->setObjectOffset( FnStoreVarArgs[i].FI, FnStoreVarArgs[i].SPOffset );
+  }
+
+  unsigned getSRetReturnReg() const { return SRetReturnReg; }
+  void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+
+  unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
+  void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+};
+
+} // end of namespace llvm
+
+#endif // MBLAZE_MACHINE_FUNCTION_INFO_H
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
new file mode 100644
index 0000000..8dfca81
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -0,0 +1,421 @@
+//===- MBlazeRegisterInfo.cpp - MBlaze Register Information -== -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-reg-info"
+
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
+#include "MBlazeRegisterInfo.h"
+#include "MBlazeMachineFunction.h"
+#include "llvm/Constants.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+
+using namespace llvm;
+
+MBlazeRegisterInfo::
+MBlazeRegisterInfo(const MBlazeSubtarget &ST, const TargetInstrInfo &tii)
+  : MBlazeGenRegisterInfo(MBlaze::ADJCALLSTACKDOWN, MBlaze::ADJCALLSTACKUP),
+    Subtarget(ST), TII(tii) {}
+
+/// getRegisterNumbering - Given the enum value for some register, e.g.
+/// MBlaze::R0, return the number that it corresponds to (e.g. 0).
+unsigned MBlazeRegisterInfo::getRegisterNumbering(unsigned RegEnum) {
+  switch (RegEnum) {
+    case MBlaze::R0  : case MBlaze::F0  : return 0;
+    case MBlaze::R1  : case MBlaze::F1  : return 1;
+    case MBlaze::R2  : case MBlaze::F2  : return 2;
+    case MBlaze::R3  : case MBlaze::F3  : return 3;
+    case MBlaze::R4  : case MBlaze::F4  : return 4;
+    case MBlaze::R5  : case MBlaze::F5  : return 5;
+    case MBlaze::R6  : case MBlaze::F6  : return 6;
+    case MBlaze::R7  : case MBlaze::F7  : return 7;
+    case MBlaze::R8  : case MBlaze::F8  : return 8;
+    case MBlaze::R9  : case MBlaze::F9  : return 9;
+    case MBlaze::R10 : case MBlaze::F10 : return 10;
+    case MBlaze::R11 : case MBlaze::F11 : return 11;
+    case MBlaze::R12 : case MBlaze::F12 : return 12;
+    case MBlaze::R13 : case MBlaze::F13 : return 13;
+    case MBlaze::R14 : case MBlaze::F14 : return 14;
+    case MBlaze::R15 : case MBlaze::F15 : return 15;
+    case MBlaze::R16 : case MBlaze::F16 : return 16;
+    case MBlaze::R17 : case MBlaze::F17 : return 17;
+    case MBlaze::R18 : case MBlaze::F18 : return 18;
+    case MBlaze::R19 : case MBlaze::F19 : return 19;
+    case MBlaze::R20 : case MBlaze::F20 : return 20;
+    case MBlaze::R21 : case MBlaze::F21 : return 21;
+    case MBlaze::R22 : case MBlaze::F22 : return 22;
+    case MBlaze::R23 : case MBlaze::F23 : return 23;
+    case MBlaze::R24 : case MBlaze::F24 : return 24;
+    case MBlaze::R25 : case MBlaze::F25 : return 25;
+    case MBlaze::R26 : case MBlaze::F26 : return 26;
+    case MBlaze::R27 : case MBlaze::F27 : return 27;
+    case MBlaze::R28 : case MBlaze::F28 : return 28;
+    case MBlaze::R29 : case MBlaze::F29 : return 29;
+    case MBlaze::R30 : case MBlaze::F30 : return 30;
+    case MBlaze::R31 : case MBlaze::F31 : return 31;
+    default: llvm_unreachable("Unknown register number!");
+  }
+  return 0; // Not reached
+}
+
+/// getRegisterFromNumbering - Given the enum value for some register, e.g.
+/// MBlaze::R0, return the number that it corresponds to (e.g. 0).
+unsigned MBlazeRegisterInfo::getRegisterFromNumbering(unsigned Reg) {
+  switch (Reg) {
+    case 0  : return MBlaze::R0;
+    case 1  : return MBlaze::R1;
+    case 2  : return MBlaze::R2;
+    case 3  : return MBlaze::R3;
+    case 4  : return MBlaze::R4;
+    case 5  : return MBlaze::R5;
+    case 6  : return MBlaze::R6;
+    case 7  : return MBlaze::R7;
+    case 8  : return MBlaze::R8;
+    case 9  : return MBlaze::R9;
+    case 10 : return MBlaze::R10;
+    case 11 : return MBlaze::R11;
+    case 12 : return MBlaze::R12;
+    case 13 : return MBlaze::R13;
+    case 14 : return MBlaze::R14;
+    case 15 : return MBlaze::R15;
+    case 16 : return MBlaze::R16;
+    case 17 : return MBlaze::R17;
+    case 18 : return MBlaze::R18;
+    case 19 : return MBlaze::R19;
+    case 20 : return MBlaze::R20;
+    case 21 : return MBlaze::R21;
+    case 22 : return MBlaze::R22;
+    case 23 : return MBlaze::R23;
+    case 24 : return MBlaze::R24;
+    case 25 : return MBlaze::R25;
+    case 26 : return MBlaze::R26;
+    case 27 : return MBlaze::R27;
+    case 28 : return MBlaze::R28;
+    case 29 : return MBlaze::R29;
+    case 30 : return MBlaze::R30;
+    case 31 : return MBlaze::R31;
+    default: llvm_unreachable("Unknown register number!");
+  }
+  return 0; // Not reached
+}
+
+unsigned MBlazeRegisterInfo::getPICCallReg() {
+  return MBlaze::R20;
+}
+
+//===----------------------------------------------------------------------===//
+// Callee Saved Registers methods
+//===----------------------------------------------------------------------===//
+
+/// MBlaze Callee Saved Registers
+const unsigned* MBlazeRegisterInfo::
+getCalleeSavedRegs(const MachineFunction *MF) const {
+  // MBlaze callee-save register range is R20 - R31
+  static const unsigned CalleeSavedRegs[] = {
+    MBlaze::R20, MBlaze::R21, MBlaze::R22, MBlaze::R23,
+    MBlaze::R24, MBlaze::R25, MBlaze::R26, MBlaze::R27,
+    MBlaze::R28, MBlaze::R29, MBlaze::R30, MBlaze::R31,
+    0
+  };
+
+  return CalleeSavedRegs;
+}
+
+/// MBlaze Callee Saved Register Classes
+const TargetRegisterClass* const* MBlazeRegisterInfo::
+getCalleeSavedRegClasses(const MachineFunction *MF) const {
+  static const TargetRegisterClass * const CalleeSavedRC[] = {
+    &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass,
+    &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass,
+    &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass,
+    &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass,
+    &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass,
+    &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass,
+    0
+  };
+
+  return CalleeSavedRC;
+}
+
+BitVector MBlazeRegisterInfo::
+getReservedRegs(const MachineFunction &MF) const {
+  BitVector Reserved(getNumRegs());
+  Reserved.set(MBlaze::R0);
+  Reserved.set(MBlaze::R1);
+  Reserved.set(MBlaze::R2);
+  Reserved.set(MBlaze::R13);
+  Reserved.set(MBlaze::R14);
+  Reserved.set(MBlaze::R15);
+  Reserved.set(MBlaze::R16);
+  Reserved.set(MBlaze::R17);
+  Reserved.set(MBlaze::R18);
+  Reserved.set(MBlaze::R19);
+  return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Stack Frame Processing methods
+// +----------------------------+
+//
+// The stack is allocated decrementing the stack pointer on
+// the first instruction of a function prologue. Once decremented,
+// all stack references are are done through a positive offset
+// from the stack/frame pointer, so the stack is considered
+// to grow up.
+//
+//===----------------------------------------------------------------------===//
+
+void MBlazeRegisterInfo::adjustMBlazeStackFrame(MachineFunction &MF) const {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+
+  // See the description at MicroBlazeMachineFunction.h
+  int TopCPUSavedRegOff = -1;
+
+  // Adjust CPU Callee Saved Registers Area. Registers RA and FP must
+  // be saved in this CPU Area there is the need. This whole Area must
+  // be aligned to the default Stack Alignment requirements.
+  unsigned StackOffset = MFI->getStackSize();
+  unsigned RegSize = 4;
+
+  // Replace the dummy '0' SPOffset by the negative offsets, as explained on
+  // LowerFORMAL_ARGUMENTS. Leaving '0' for while is necessary to avoid
+  // the approach done by calculateFrameObjectOffsets to the stack frame.
+  MBlazeFI->adjustLoadArgsFI(MFI);
+  MBlazeFI->adjustStoreVarArgsFI(MFI);
+
+  if (hasFP(MF)) {
+    MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true),
+                         StackOffset);
+    MBlazeFI->setFPStackOffset(StackOffset);
+    TopCPUSavedRegOff = StackOffset;
+    StackOffset += RegSize;
+  }
+
+  if (MFI->hasCalls()) {
+    MBlazeFI->setRAStackOffset(0);
+    MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true),
+                         StackOffset);
+    TopCPUSavedRegOff = StackOffset;
+    StackOffset += RegSize;
+  }
+
+  // Update frame info
+  MFI->setStackSize(StackOffset);
+
+  // Recalculate the final tops offset. The final values must be '0'
+  // if there isn't a callee saved register for CPU or FPU, otherwise
+  // a negative offset is needed.
+  if (TopCPUSavedRegOff >= 0)
+    MBlazeFI->setCPUTopSavedRegOff(TopCPUSavedRegOff-StackOffset);
+}
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register.  This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool MBlazeRegisterInfo::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  return NoFramePointerElim || MFI->hasVarSizedObjects();
+}
+
+// This function eliminate ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+void MBlazeRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+  MBB.erase(I);
+}
+
+// FrameIndex represent objects inside a abstract stack.
+// We must replace FrameIndex with an stack/frame pointer
+// direct reference.
+unsigned MBlazeRegisterInfo::
+eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+                    int *Value, RegScavenger *RS) const {
+  MachineInstr &MI = *II;
+  MachineFunction &MF = *MI.getParent()->getParent();
+
+  unsigned i = 0;
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() &&
+           "Instr doesn't have FrameIndex operand!");
+  }
+
+  unsigned oi = i == 2 ? 1 : 2;
+
+  DEBUG(errs() << "\nFunction : " << MF.getFunction()->getName() << "\n";
+        errs() << "<--------->\n" << MI);
+
+  int FrameIndex = MI.getOperand(i).getIndex();
+  int stackSize  = MF.getFrameInfo()->getStackSize();
+  int spOffset   = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+
+  DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n"
+               << "spOffset   : " << spOffset << "\n"
+               << "stackSize  : " << stackSize << "\n");
+
+  // as explained on LowerFormalArguments, detect negative offsets
+  // and adjust SPOffsets considering the final stack size.
+  int Offset = (spOffset < 0) ? (stackSize - spOffset) : (spOffset + 4);
+  Offset    += MI.getOperand(oi).getImm();
+
+  DEBUG(errs() << "Offset     : " << Offset << "\n" << "<--------->\n");
+
+  MI.getOperand(oi).ChangeToImmediate(Offset);
+  MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false);
+  return 0;
+}
+
+void MBlazeRegisterInfo::
+emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB   = MF.front();
+  MachineFrameInfo *MFI    = MF.getFrameInfo();
+  MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  DebugLoc dl = (MBBI != MBB.end() ?
+                 MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
+
+  // Get the right frame order for MBlaze.
+  adjustMBlazeStackFrame(MF);
+
+  // Get the number of bytes to allocate from the FrameInfo.
+  unsigned StackSize = MFI->getStackSize();
+
+  // No need to allocate space on the stack.
+  if (StackSize == 0 && !MFI->hasCalls()) return;
+  if (StackSize < 28 && MFI->hasCalls()) StackSize = 28;
+
+  int FPOffset = MBlazeFI->getFPStackOffset();
+  int RAOffset = MBlazeFI->getRAStackOffset();
+
+  // Adjust stack : addi R1, R1, -imm
+  BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADDI), MBlaze::R1)
+      .addReg(MBlaze::R1).addImm(-StackSize);
+
+  // Save the return address only if the function isnt a leaf one.
+  // swi  R15, R1, stack_loc
+  if (MFI->hasCalls()) {
+    BuildMI(MBB, MBBI, dl, TII.get(MBlaze::SWI))
+        .addReg(MBlaze::R15).addImm(RAOffset).addReg(MBlaze::R1);
+  }
+
+  // if framepointer enabled, save it and set it
+  // to point to the stack pointer
+  if (hasFP(MF)) {
+    // swi  R19, R1, stack_loc
+    BuildMI(MBB, MBBI, dl, TII.get(MBlaze::SWI))
+      .addReg(MBlaze::R19).addImm(FPOffset).addReg(MBlaze::R1);
+
+    // add R19, R1, R0
+    BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADD), MBlaze::R19)
+      .addReg(MBlaze::R1).addReg(MBlaze::R0);
+  }
+}
+
+void MBlazeRegisterInfo::
+emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = prior(MBB.end());
+  MachineFrameInfo *MFI            = MF.getFrameInfo();
+  MBlazeFunctionInfo *MBlazeFI         = MF.getInfo<MBlazeFunctionInfo>();
+  DebugLoc dl = MBBI->getDebugLoc();
+
+  // Get the FI's where RA and FP are saved.
+  int FPOffset = MBlazeFI->getFPStackOffset();
+  int RAOffset = MBlazeFI->getRAStackOffset();
+
+  // if framepointer enabled, restore it and restore the
+  // stack pointer
+  if (hasFP(MF)) {
+    // add R1, R19, R0
+    BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADD), MBlaze::R1)
+      .addReg(MBlaze::R19).addReg(MBlaze::R0);
+
+    // lwi  R19, R1, stack_loc
+    BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R19)
+      .addImm(FPOffset).addReg(MBlaze::R1);
+  }
+
+  // Restore the return address only if the function isnt a leaf one.
+  // lwi R15, R1, stack_loc
+  if (MFI->hasCalls()) {
+    BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R15)
+      .addImm(RAOffset).addReg(MBlaze::R1);
+  }
+
+  // Get the number of bytes from FrameInfo
+  int StackSize = (int) MFI->getStackSize();
+  if (StackSize < 28 && MFI->hasCalls()) StackSize = 28;
+
+  // adjust stack.
+  // addi R1, R1, imm
+  if (StackSize) {
+    BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADDI), MBlaze::R1)
+      .addReg(MBlaze::R1).addImm(StackSize);
+  }
+}
+
+
+void MBlazeRegisterInfo::
+processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
+  // Set the stack offset where GP must be saved/loaded from.
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+  if (MBlazeFI->needGPSaveRestore())
+    MFI->setObjectOffset(MBlazeFI->getGPFI(), MBlazeFI->getGPStackOffset());
+}
+
+unsigned MBlazeRegisterInfo::getRARegister() const {
+  return MBlaze::R15;
+}
+
+unsigned MBlazeRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+  return hasFP(MF) ? MBlaze::R19 : MBlaze::R1;
+}
+
+unsigned MBlazeRegisterInfo::getEHExceptionRegister() const {
+  llvm_unreachable("What is the exception register");
+  return 0;
+}
+
+unsigned MBlazeRegisterInfo::getEHHandlerRegister() const {
+  llvm_unreachable("What is the exception handler register");
+  return 0;
+}
+
+int MBlazeRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+  llvm_unreachable("What is the dwarf register number");
+  return -1;
+}
+
+#include "MBlazeGenRegisterInfo.inc"
+
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h
new file mode 100644
index 0000000..cde7d39
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h
@@ -0,0 +1,96 @@
+//===- MBlazeRegisterInfo.h - MBlaze Register Information Impl --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEREGISTERINFO_H
+#define MBLAZEREGISTERINFO_H
+
+#include "MBlaze.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "MBlazeGenRegisterInfo.h.inc"
+
+namespace llvm {
+class MBlazeSubtarget;
+class TargetInstrInfo;
+class Type;
+
+namespace MBlaze {
+  /// SubregIndex - The index of various sized subregister classes. Note that 
+  /// these indices must be kept in sync with the class indices in the 
+  /// MBlazeRegisterInfo.td file.
+  enum SubregIndex {
+    SUBREG_FPEVEN = 1, SUBREG_FPODD = 2
+  };
+}
+
+struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
+  const MBlazeSubtarget &Subtarget;
+  const TargetInstrInfo &TII;
+  
+  MBlazeRegisterInfo(const MBlazeSubtarget &Subtarget,
+                     const TargetInstrInfo &tii);
+
+  /// getRegisterNumbering - Given the enum value for some register, e.g.
+  /// MBlaze::RA, return the number that it corresponds to (e.g. 31).
+  static unsigned getRegisterNumbering(unsigned RegEnum);
+  static unsigned getRegisterFromNumbering(unsigned RegEnum);
+
+  /// Get PIC indirect call register
+  static unsigned getPICCallReg();
+
+  /// Adjust the MBlaze stack frame.
+  void adjustMBlazeStackFrame(MachineFunction &MF) const;
+
+  /// Code Generation virtual methods...
+  const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+
+  const TargetRegisterClass* const*
+  getCalleeSavedRegClasses(const MachineFunction* MF = 0) const;
+
+  BitVector getReservedRegs(const MachineFunction &MF) const;
+
+  bool hasFP(const MachineFunction &MF) const;
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
+  /// Stack Frame Processing Methods
+  unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
+                               int SPAdj, int *Value = NULL,
+                               RegScavenger *RS = NULL) const;
+
+  void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+  
+  /// Debug information queries.
+  unsigned getRARegister() const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
+
+  /// Exception handling queries.
+  unsigned getEHExceptionRegister() const;
+  unsigned getEHHandlerRegister() const;
+
+  /// targetHandlesStackFrameRounding - Returns true if the target is
+  /// responsible for rounding up the stack frame (probably at emitPrologue
+  /// time).
+  bool targetHandlesStackFrameRounding() const { return true; }
+
+  int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.td b/lib/Target/MBlaze/MBlazeRegisterInfo.td
new file mode 100644
index 0000000..96a5c98
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.td
@@ -0,0 +1,186 @@
+//===- MBlazeRegisterInfo.td - MBlaze Register defs -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Declarations that describe the MicroBlaze register file
+//===----------------------------------------------------------------------===//
+
+// We have banks of 32 registers each.
+class MBlazeReg<string n> : Register<n> {
+  field bits<5> Num;
+  let Namespace = "MBlaze";
+}
+
+class MBlazeRegWithSubRegs<string n, list<Register> subregs>
+  : RegisterWithSubRegs<n, subregs> {
+  field bits<5> Num;
+  let Namespace = "MBlaze";
+}
+
+// MBlaze CPU Registers
+class MBlazeGPRReg<bits<5> num, string n> : MBlazeReg<n> {
+  let Num = num;
+}
+
+// MBlaze 32-bit (aliased) FPU Registers
+class FPR<bits<5> num, string n, list<Register> subregs>
+  : MBlazeRegWithSubRegs<n, subregs> {
+  let Num = num;
+}
+
+//===----------------------------------------------------------------------===//
+//  Registers
+//===----------------------------------------------------------------------===//
+
+let Namespace = "MBlaze" in {
+
+  // General Purpose Registers
+  def R0  : MBlazeGPRReg< 0,  "r0">,   DwarfRegNum<[0]>;
+  def R1  : MBlazeGPRReg< 1,  "r1">,   DwarfRegNum<[1]>;
+  def R2  : MBlazeGPRReg< 2,  "r2">,   DwarfRegNum<[2]>;
+  def R3  : MBlazeGPRReg< 3,  "r3">,   DwarfRegNum<[3]>;
+  def R4  : MBlazeGPRReg< 4,  "r4">,   DwarfRegNum<[5]>;
+  def R5  : MBlazeGPRReg< 5,  "r5">,   DwarfRegNum<[5]>;
+  def R6  : MBlazeGPRReg< 6,  "r6">,   DwarfRegNum<[6]>;
+  def R7  : MBlazeGPRReg< 7,  "r7">,   DwarfRegNum<[7]>;
+  def R8  : MBlazeGPRReg< 8,  "r8">,   DwarfRegNum<[8]>;
+  def R9  : MBlazeGPRReg< 9,  "r9">,   DwarfRegNum<[9]>;
+  def R10 : MBlazeGPRReg< 10, "r10">,  DwarfRegNum<[10]>;
+  def R11 : MBlazeGPRReg< 11, "r11">,  DwarfRegNum<[11]>;
+  def R12 : MBlazeGPRReg< 12, "r12">,  DwarfRegNum<[12]>;
+  def R13 : MBlazeGPRReg< 13, "r13">,  DwarfRegNum<[13]>;
+  def R14 : MBlazeGPRReg< 14, "r14">,  DwarfRegNum<[14]>;
+  def R15 : MBlazeGPRReg< 15, "r15">,  DwarfRegNum<[15]>;
+  def R16 : MBlazeGPRReg< 16, "r16">,  DwarfRegNum<[16]>;
+  def R17 : MBlazeGPRReg< 17, "r17">,  DwarfRegNum<[17]>;
+  def R18 : MBlazeGPRReg< 18, "r18">,  DwarfRegNum<[18]>;
+  def R19 : MBlazeGPRReg< 19, "r19">,  DwarfRegNum<[19]>;
+  def R20 : MBlazeGPRReg< 20, "r20">,  DwarfRegNum<[20]>;
+  def R21 : MBlazeGPRReg< 21, "r21">,  DwarfRegNum<[21]>;
+  def R22 : MBlazeGPRReg< 22, "r22">,  DwarfRegNum<[22]>;
+  def R23 : MBlazeGPRReg< 23, "r23">,  DwarfRegNum<[23]>;
+  def R24 : MBlazeGPRReg< 24, "r24">,  DwarfRegNum<[24]>;
+  def R25 : MBlazeGPRReg< 25, "r25">,  DwarfRegNum<[25]>;
+  def R26 : MBlazeGPRReg< 26, "r26">,  DwarfRegNum<[26]>;
+  def R27 : MBlazeGPRReg< 27, "r27">,  DwarfRegNum<[27]>;
+  def R28 : MBlazeGPRReg< 28, "r28">,  DwarfRegNum<[28]>;
+  def R29 : MBlazeGPRReg< 29, "r29">,  DwarfRegNum<[29]>;
+  def R30 : MBlazeGPRReg< 30, "r30">,  DwarfRegNum<[30]>;
+  def R31 : MBlazeGPRReg< 31, "r31">,  DwarfRegNum<[31]>;
+
+  /// MBlaze Single point precision FPU Registers
+  def F0  : FPR< 0,  "r0", [R0]>,  DwarfRegNum<[32]>;
+  def F1  : FPR< 1,  "r1", [R1]>,  DwarfRegNum<[33]>;
+  def F2  : FPR< 2,  "r2", [R2]>,  DwarfRegNum<[34]>;
+  def F3  : FPR< 3,  "r3", [R3]>,  DwarfRegNum<[35]>;
+  def F4  : FPR< 4,  "r4", [R4]>,  DwarfRegNum<[36]>;
+  def F5  : FPR< 5,  "r5", [R5]>,  DwarfRegNum<[37]>;
+  def F6  : FPR< 6,  "r6", [R6]>,  DwarfRegNum<[38]>;
+  def F7  : FPR< 7,  "r7", [R7]>,  DwarfRegNum<[39]>;
+  def F8  : FPR< 8,  "r8", [R8]>,  DwarfRegNum<[40]>;
+  def F9  : FPR< 9,  "r9", [R9]>,  DwarfRegNum<[41]>;
+  def F10 : FPR<10, "r10", [R10]>, DwarfRegNum<[42]>;
+  def F11 : FPR<11, "r11", [R11]>, DwarfRegNum<[43]>;
+  def F12 : FPR<12, "r12", [R12]>, DwarfRegNum<[44]>;
+  def F13 : FPR<13, "r13", [R13]>, DwarfRegNum<[45]>;
+  def F14 : FPR<14, "r14", [R14]>, DwarfRegNum<[46]>;
+  def F15 : FPR<15, "r15", [R15]>, DwarfRegNum<[47]>;
+  def F16 : FPR<16, "r16", [R16]>, DwarfRegNum<[48]>;
+  def F17 : FPR<17, "r17", [R17]>, DwarfRegNum<[49]>;
+  def F18 : FPR<18, "r18", [R18]>, DwarfRegNum<[50]>;
+  def F19 : FPR<19, "r19", [R19]>, DwarfRegNum<[51]>;
+  def F20 : FPR<20, "r20", [R20]>, DwarfRegNum<[52]>;
+  def F21 : FPR<21, "r21", [R21]>, DwarfRegNum<[53]>;
+  def F22 : FPR<22, "r22", [R22]>, DwarfRegNum<[54]>;
+  def F23 : FPR<23, "r23", [R23]>, DwarfRegNum<[55]>;
+  def F24 : FPR<24, "r24", [R24]>, DwarfRegNum<[56]>;
+  def F25 : FPR<25, "r25", [R25]>, DwarfRegNum<[57]>;
+  def F26 : FPR<26, "r26", [R26]>, DwarfRegNum<[58]>;
+  def F27 : FPR<27, "r27", [R27]>, DwarfRegNum<[59]>;
+  def F28 : FPR<28, "r28", [R28]>, DwarfRegNum<[60]>;
+  def F29 : FPR<29, "r29", [R29]>, DwarfRegNum<[61]>;
+  def F30 : FPR<30, "r30", [R30]>, DwarfRegNum<[62]>;
+  def F31 : FPR<31, "r31", [R31]>, DwarfRegNum<[63]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Register Classes
+//===----------------------------------------------------------------------===//
+
+def CPURegs : RegisterClass<"MBlaze", [i32], 32,
+  [
+  // Return Values and Arguments
+  R3, R4, R5, R6, R7, R8, R9, R10,
+
+  // Not preserved across procedure calls
+  R11, R12,
+
+  // Callee save
+  R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
+
+  // Reserved
+  R0,  // Always zero
+  R1,  // The stack pointer
+  R2,  // Read-only small data area anchor
+  R13, // Read-write small data area anchor
+  R14, // Return address for interrupts
+  R15, // Return address for sub-routines
+  R16, // Return address for trap
+  R17, // Return address for exceptions
+  R18, // Reserved for assembler
+  R19  // The frame-pointer
+  ]>
+{
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    CPURegsClass::iterator
+    CPURegsClass::allocation_order_end(const MachineFunction &MF) const {
+      // The last 10 registers on the list above are reserved
+      return end()-10;
+    }
+  }];
+}
+
+def FGR32 : RegisterClass<"MBlaze", [f32], 32,
+  [
+  // Return Values and Arguments
+  F3, F4, F5, F6, F7, F8, F9, F10,
+
+  // Not preserved across procedure calls
+  F11, F12,
+
+  // Callee save
+  F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30, F31,
+
+  // Reserved
+  F0,  // Always zero
+  F1,  // The stack pointer
+  F2,  // Read-only small data area anchor
+  F13, // Read-write small data area anchor
+  F14, // Return address for interrupts
+  F15, // Return address for sub-routines
+  F16, // Return address for trap
+  F17, // Return address for exceptions
+  F18, // Reserved for assembler
+  F19  // The frame pointer
+  ]>
+{
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    FGR32Class::iterator
+    FGR32Class::allocation_order_end(const MachineFunction &MF) const {
+      // The last 10 registers on the list above are reserved
+      return end()-10;
+    }
+  }];
+}
diff --git a/lib/Target/MBlaze/MBlazeSchedule.td b/lib/Target/MBlaze/MBlazeSchedule.td
new file mode 100644
index 0000000..6a94491
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeSchedule.td
@@ -0,0 +1,63 @@
+//===- MBlazeSchedule.td - MBlaze Scheduling Definitions --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Functional units across MBlaze chips sets. Based on GCC/MBlaze backend files.
+//===----------------------------------------------------------------------===//
+def ALU     : FuncUnit;
+def IMULDIV : FuncUnit;
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for MBlaze 
+//===----------------------------------------------------------------------===//
+def IIAlu              : InstrItinClass;
+def IILoad             : InstrItinClass;
+def IIStore            : InstrItinClass;
+def IIXfer             : InstrItinClass;
+def IIBranch           : InstrItinClass;
+def IIHiLo             : InstrItinClass;
+def IIImul             : InstrItinClass;
+def IIIdiv             : InstrItinClass;
+def IIFcvt             : InstrItinClass;
+def IIFmove            : InstrItinClass;
+def IIFcmp             : InstrItinClass;
+def IIFadd             : InstrItinClass;
+def IIFmulSingle       : InstrItinClass;
+def IIFmulDouble       : InstrItinClass;
+def IIFdivSingle       : InstrItinClass;
+def IIFdivDouble       : InstrItinClass;
+def IIFsqrtSingle      : InstrItinClass;
+def IIFsqrtDouble      : InstrItinClass;
+def IIFrecipFsqrtStep  : InstrItinClass;
+def IIPseudo           : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// MBlaze Generic instruction itineraries.
+//===----------------------------------------------------------------------===//
+def MBlazeGenericItineraries : ProcessorItineraries<[
+  InstrItinData<IIAlu              , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<IILoad             , [InstrStage<3,  [ALU]>]>,
+  InstrItinData<IIStore            , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<IIXfer             , [InstrStage<2,  [ALU]>]>,
+  InstrItinData<IIBranch           , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<IIHiLo             , [InstrStage<1,  [IMULDIV]>]>,
+  InstrItinData<IIImul             , [InstrStage<17, [IMULDIV]>]>,
+  InstrItinData<IIIdiv             , [InstrStage<38, [IMULDIV]>]>,
+  InstrItinData<IIFcvt             , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<IIFmove            , [InstrStage<2,  [ALU]>]>,
+  InstrItinData<IIFcmp             , [InstrStage<3,  [ALU]>]>,
+  InstrItinData<IIFadd             , [InstrStage<4,  [ALU]>]>,
+  InstrItinData<IIFmulSingle       , [InstrStage<7,  [ALU]>]>,
+  InstrItinData<IIFmulDouble       , [InstrStage<8,  [ALU]>]>,
+  InstrItinData<IIFdivSingle       , [InstrStage<23, [ALU]>]>,
+  InstrItinData<IIFdivDouble       , [InstrStage<36, [ALU]>]>,
+  InstrItinData<IIFsqrtSingle      , [InstrStage<54, [ALU]>]>,
+  InstrItinData<IIFsqrtDouble      , [InstrStage<12, [ALU]>]>,
+  InstrItinData<IIFrecipFsqrtStep  , [InstrStage<5,  [ALU]>]>
+]>;
diff --git a/lib/Target/MBlaze/MBlazeSubtarget.cpp b/lib/Target/MBlaze/MBlazeSubtarget.cpp
new file mode 100644
index 0000000..3440521
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeSubtarget.cpp
@@ -0,0 +1,31 @@
+//===- MBlazeSubtarget.cpp - MBlaze Subtarget Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MBlaze specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeSubtarget.h"
+#include "MBlaze.h"
+#include "MBlazeGenSubtarget.inc"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+MBlazeSubtarget::MBlazeSubtarget(const std::string &TT, const std::string &FS):
+  HasPipe3(false), HasBarrel(false), HasDiv(false), HasMul(false),
+  HasFSL(false), HasEFSL(false), HasMSRSet(false), HasException(false),
+  HasPatCmp(false), HasFPU(false), HasESR(false), HasPVR(false),
+  HasMul64(false), HasSqrt(false), HasMMU(false)
+{
+  std::string CPU = "v400";
+  MBlazeArchVersion = V400;
+
+  // Parse features string.
+  ParseSubtargetFeatures(FS, CPU);
+}
diff --git a/lib/Target/MBlaze/MBlazeSubtarget.h b/lib/Target/MBlaze/MBlazeSubtarget.h
new file mode 100644
index 0000000..bebb3f7
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeSubtarget.h
@@ -0,0 +1,79 @@
+//=====-- MBlazeSubtarget.h - Define Subtarget for the MBlaze -*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MBlaze specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZESUBTARGET_H
+#define MBLAZESUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include <string>
+
+namespace llvm {
+
+class MBlazeSubtarget : public TargetSubtarget {
+
+protected:
+
+  enum MBlazeArchEnum {
+    V400, V500, V600, V700, V710
+  };
+
+  // MBlaze architecture version
+  MBlazeArchEnum MBlazeArchVersion;
+
+  bool HasPipe3;
+  bool HasBarrel;
+  bool HasDiv;
+  bool HasMul;
+  bool HasFSL;
+  bool HasEFSL;
+  bool HasMSRSet;
+  bool HasException;
+  bool HasPatCmp;
+  bool HasFPU;
+  bool HasESR;
+  bool HasPVR;
+  bool HasMul64;
+  bool HasSqrt;
+  bool HasMMU;
+
+  InstrItineraryData InstrItins;
+
+public:
+
+  /// This constructor initializes the data members to match that
+  /// of the specified triple.
+  MBlazeSubtarget(const std::string &TT, const std::string &FS);
+
+  /// ParseSubtargetFeatures - Parses features string setting specified
+  /// subtarget options.  Definition of function is auto generated by tblgen.
+  std::string ParseSubtargetFeatures(const std::string &FS,
+                                     const std::string &CPU);
+
+  bool hasFPU()    const { return HasFPU; }
+  bool hasSqrt()   const { return HasSqrt; }
+  bool hasMul()    const { return HasMul; }
+  bool hasMul64()  const { return HasMul64; }
+  bool hasDiv()    const { return HasDiv; }
+  bool hasBarrel() const { return HasBarrel; }
+
+  bool isV400() const { return MBlazeArchVersion == V400; }
+  bool isV500() const { return MBlazeArchVersion == V500; }
+  bool isV600() const { return MBlazeArchVersion == V600; }
+  bool isV700() const { return MBlazeArchVersion == V700; }
+  bool isV710() const { return MBlazeArchVersion == V710; }
+};
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
new file mode 100644
index 0000000..9eba2b3
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -0,0 +1,66 @@
+//===-- MBlazeTargetMachine.cpp - Define TargetMachine for MBlaze ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the info about MBlaze target spec.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlaze.h"
+#include "MBlazeMCAsmInfo.h"
+#include "MBlazeTargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+extern "C" void LLVMInitializeMBlazeTarget() {
+  // Register the target.
+  RegisterTargetMachine<MBlazeTargetMachine> X(TheMBlazeTarget);
+  RegisterAsmInfo<MBlazeMCAsmInfo> A(TheMBlazeTarget);
+}
+
+// DataLayout --> Big-endian, 32-bit pointer/ABI/alignment
+// The stack is always 8 byte aligned
+// On function prologue, the stack is created by decrementing
+// its pointer. Once decremented, all references are done with positive
+// offset from the stack/frame pointer, using StackGrowsUp enables
+// an easier handling.
+MBlazeTargetMachine::
+MBlazeTargetMachine(const Target &T, const std::string &TT,
+                    const std::string &FS):
+  LLVMTargetMachine(T, TT),
+  Subtarget(TT, FS),
+  DataLayout("E-p:32:32-i8:8:8-i16:16:16-i64:32:32-"
+             "f64:32:32-v64:32:32-v128:32:32-n32"),
+  InstrInfo(*this),
+  FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0),
+  TLInfo(*this) {
+  if (getRelocationModel() == Reloc::Default) {
+      setRelocationModel(Reloc::Static);
+  }
+
+  if (getCodeModel() == CodeModel::Default)
+    setCodeModel(CodeModel::Small);
+}
+
+// Install an instruction selector pass using
+// the ISelDag to gen MBlaze code.
+bool MBlazeTargetMachine::
+addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
+  PM.add(createMBlazeISelDag(*this));
+  return false;
+}
+
+// Implemented by targets that want to run passes immediately before
+// machine code is emitted. return true if -print-machineinstrs should
+// print out the code after the passes.
+bool MBlazeTargetMachine::
+addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
+  PM.add(createMBlazeDelaySlotFillerPass(*this));
+  return true;
+}
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h
new file mode 100644
index 0000000..85c975c
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.h
@@ -0,0 +1,69 @@
+//===-- MBlazeTargetMachine.h - Define TargetMachine for MBlaze --- C++ ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MBlaze specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZE_TARGETMACHINE_H
+#define MBLAZE_TARGETMACHINE_H
+
+#include "MBlazeSubtarget.h"
+#include "MBlazeInstrInfo.h"
+#include "MBlazeISelLowering.h"
+#include "MBlazeIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+
+namespace llvm {
+  class formatted_raw_ostream;
+
+  class MBlazeTargetMachine : public LLVMTargetMachine {
+    MBlazeSubtarget       Subtarget;
+    const TargetData    DataLayout; // Calculates type size & alignment
+    MBlazeInstrInfo       InstrInfo;
+    TargetFrameInfo     FrameInfo;
+    MBlazeTargetLowering  TLInfo;
+    MBlazeIntrinsicInfo IntrinsicInfo;
+  public:
+    MBlazeTargetMachine(const Target &T, const std::string &TT,
+                      const std::string &FS);
+
+    virtual const MBlazeInstrInfo *getInstrInfo() const
+    { return &InstrInfo; }
+
+    virtual const TargetFrameInfo *getFrameInfo() const
+    { return &FrameInfo; }
+
+    virtual const MBlazeSubtarget *getSubtargetImpl() const
+    { return &Subtarget; }
+
+    virtual const TargetData *getTargetData() const
+    { return &DataLayout;}
+
+    virtual const MBlazeRegisterInfo *getRegisterInfo() const
+    { return &InstrInfo.getRegisterInfo(); }
+
+    virtual MBlazeTargetLowering   *getTargetLowering() const
+    { return const_cast<MBlazeTargetLowering*>(&TLInfo); }
+
+    const TargetIntrinsicInfo *getIntrinsicInfo() const
+    { return &IntrinsicInfo; }
+
+    // Pass Pipeline Configuration
+    virtual bool addInstSelector(PassManagerBase &PM,
+                                 CodeGenOpt::Level OptLevel);
+
+    virtual bool addPreEmitPass(PassManagerBase &PM,
+                                CodeGenOpt::Level OptLevel);
+  };
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp b/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
new file mode 100644
index 0000000..79c9494
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
@@ -0,0 +1,88 @@
+//===-- MBlazeTargetObjectFile.cpp - MBlaze object files ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeTargetObjectFile.h"
+#include "MBlazeSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+void MBlazeTargetObjectFile::
+Initialize(MCContext &Ctx, const TargetMachine &TM) {
+  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+
+  SmallDataSection =
+    getELFSection(".sdata", MCSectionELF::SHT_PROGBITS,
+                  MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
+                  SectionKind::getDataRel());
+
+  SmallBSSSection =
+    getELFSection(".sbss", MCSectionELF::SHT_NOBITS,
+                  MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
+                  SectionKind::getBSS());
+
+}
+
+// A address must be loaded from a small section if its size is less than the
+// small section size threshold. Data in this section must be addressed using
+// gp_rel operator.
+static bool IsInSmallSection(uint64_t Size) {
+  return Size > 0 && Size <= 8;
+}
+
+bool MBlazeTargetObjectFile::
+IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM) const {
+  if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
+    return false;
+
+  return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM));
+}
+
+/// IsGlobalInSmallSection - Return true if this global address should be
+/// placed into small data/bss section.
+bool MBlazeTargetObjectFile::
+IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
+                       SectionKind Kind) const {
+  // Only global variables, not functions.
+  const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV);
+  if (!GVA)
+    return false;
+
+  // We can only do this for datarel or BSS objects for now.
+  if (!Kind.isBSS() && !Kind.isDataRel())
+    return false;
+
+  // If this is a internal constant string, there is a special
+  // section for it, but not in small data/bss.
+  if (Kind.isMergeable1ByteCString())
+    return false;
+
+  const Type *Ty = GV->getType()->getElementType();
+  return IsInSmallSection(TM.getTargetData()->getTypeAllocSize(Ty));
+}
+
+const MCSection *MBlazeTargetObjectFile::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                       Mangler *Mang, const TargetMachine &TM) const {
+  // TODO: Could also support "weak" symbols as well with ".gnu.linkonce.s.*"
+  // sections?
+
+  // Handle Small Section classification here.
+  if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind))
+    return SmallBSSSection;
+  if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind))
+    return SmallDataSection;
+
+  // Otherwise, we work the same as ELF.
+  return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM);
+}
diff --git a/lib/Target/MBlaze/MBlazeTargetObjectFile.h b/lib/Target/MBlaze/MBlazeTargetObjectFile.h
new file mode 100644
index 0000000..20e7702
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeTargetObjectFile.h
@@ -0,0 +1,41 @@
+//===-- llvm/Target/MBlazeTargetObjectFile.h - MBlaze Obj. Info -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MBLAZE_TARGETOBJECTFILE_H
+#define LLVM_TARGET_MBLAZE_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+namespace llvm {
+
+  class MBlazeTargetObjectFile : public TargetLoweringObjectFileELF {
+    const MCSection *SmallDataSection;
+    const MCSection *SmallBSSSection;
+  public:
+    
+    void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+    
+    /// IsGlobalInSmallSection - Return true if this global address should be
+    /// placed into small data/bss section.
+    bool IsGlobalInSmallSection(const GlobalValue *GV,
+                                const TargetMachine &TM,
+                                SectionKind Kind) const;
+
+    bool IsGlobalInSmallSection(const GlobalValue *GV,
+                                const TargetMachine &TM) const;  
+    
+    const MCSection *SelectSectionForGlobal(const GlobalValue *GV,
+                                            SectionKind Kind,
+                                            Mangler *Mang,
+                                            const TargetMachine &TM) const;
+  };
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/MBlaze/Makefile b/lib/Target/MBlaze/Makefile
new file mode 100644
index 0000000..19e508c
--- /dev/null
+++ b/lib/Target/MBlaze/Makefile
@@ -0,0 +1,23 @@
+##===- lib/Target/MBlaze/Makefile --------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMMBlazeCodeGen
+TARGET = MBlaze
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = MBlazeGenRegisterInfo.h.inc MBlazeGenRegisterNames.inc \
+                MBlazeGenRegisterInfo.inc MBlazeGenInstrNames.inc \
+                MBlazeGenInstrInfo.inc MBlazeGenAsmWriter.inc \
+                MBlazeGenDAGISel.inc MBlazeGenCallingConv.inc \
+                MBlazeGenSubtarget.inc MBlazeGenIntrinsics.inc
+
+DIRS = AsmPrinter TargetInfo
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Target/MBlaze/TargetInfo/CMakeLists.txt b/lib/Target/MBlaze/TargetInfo/CMakeLists.txt
new file mode 100644
index 0000000..5afb14d
--- /dev/null
+++ b/lib/Target/MBlaze/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMBlazeInfo
+  MBlazeTargetInfo.cpp
+  )
+
+add_dependencies(LLVMMBlazeInfo MBlazeCodeGenTable_gen)
diff --git a/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp b/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp
new file mode 100644
index 0000000..16e01db
--- /dev/null
+++ b/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp
@@ -0,0 +1,19 @@
+//===-- MBlazeTargetInfo.cpp - MBlaze Target Implementation ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlaze.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheMBlazeTarget;
+
+extern "C" void LLVMInitializeMBlazeTargetInfo() {
+  RegisterTarget<Triple::mblaze> X(TheMBlazeTarget, "mblaze", "MBlaze");
+}
diff --git a/lib/Target/MBlaze/TargetInfo/Makefile b/lib/Target/MBlaze/TargetInfo/Makefile
new file mode 100644
index 0000000..fb7ea11
--- /dev/null
+++ b/lib/Target/MBlaze/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/MBlaze/TargetInfo/Makefile ---------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMBlazeInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp
index a96ee49..ac41cc8 100644
--- a/lib/Target/MSIL/MSILWriter.cpp
+++ b/lib/Target/MSIL/MSILWriter.cpp
@@ -38,7 +38,8 @@ namespace llvm {
     virtual bool addPassesToEmitWholeFile(PassManager &PM,
                                           formatted_raw_ostream &Out,
                                           CodeGenFileType FileType,
-                                          CodeGenOpt::Level OptLevel);
+                                          CodeGenOpt::Level OptLevel,
+                                          bool DisableVerify);
 
     virtual const TargetData *getTargetData() const { return 0; }
   };
@@ -57,7 +58,7 @@ bool MSILModule::runOnModule(Module &M) {
   TypeSymbolTable& Table = M.getTypeSymbolTable();
   std::set<const Type *> Types = getAnalysis<FindUsedTypes>().getTypes();
   for (TypeSymbolTable::iterator I = Table.begin(), E = Table.end(); I!=E; ) {
-    if (!isa<StructType>(I->second) && !isa<OpaqueType>(I->second))
+    if (!I->second->isStructTy() && !I->second->isOpaqueTy())
       Table.remove(I++);
     else {
       std::set<const Type *>::iterator T = Types.find(I->second);
@@ -187,7 +188,7 @@ void MSILWriter::printModuleStartup() {
     break;
   case 1:
     Arg1 = F->arg_begin();
-    if (Arg1->getType()->isInteger()) {
+    if (Arg1->getType()->isIntegerTy()) {
       Out << "\tldloc\targc\n";
       Args = getTypeName(Arg1->getType());
       BadSig = false;
@@ -195,7 +196,7 @@ void MSILWriter::printModuleStartup() {
     break;
   case 2:
     Arg1 = Arg2 = F->arg_begin(); ++Arg2;
-    if (Arg1->getType()->isInteger() && 
+    if (Arg1->getType()->isIntegerTy() && 
         Arg2->getType()->getTypeID() == Type::PointerTyID) {
       Out << "\tldloc\targc\n\tldloc\targv\n";
       Args = getTypeName(Arg1->getType())+","+getTypeName(Arg2->getType());
@@ -207,7 +208,7 @@ void MSILWriter::printModuleStartup() {
   }
 
   bool RetVoid = (F->getReturnType()->getTypeID() == Type::VoidTyID);
-  if (BadSig || (!F->getReturnType()->isInteger() && !RetVoid)) {
+  if (BadSig || (!F->getReturnType()->isIntegerTy() && !RetVoid)) {
     Out << "\tldc.i4.0\n";
   } else {
     Out << "\tcall\t" << getTypeName(F->getReturnType()) <<
@@ -334,7 +335,7 @@ std::string MSILWriter::getPrimitiveTypeName(const Type* Ty, bool isSigned) {
 
 std::string MSILWriter::getTypeName(const Type* Ty, bool isSigned,
                                     bool isNested) {
-  if (Ty->isPrimitiveType() || Ty->isInteger())
+  if (Ty->isPrimitiveType() || Ty->isIntegerTy())
     return getPrimitiveTypeName(Ty,isSigned);
   // FIXME: "OpaqueType" support
   switch (Ty->getTypeID()) {
@@ -1459,7 +1460,7 @@ void MSILWriter::printDeclarations(const TypeSymbolTable& ST) {
   for (std::set<const Type*>::const_iterator
        UI = UsedTypes->begin(), UE = UsedTypes->end(); UI!=UE; ++UI) {
     const Type* Ty = *UI;
-    if (isa<ArrayType>(Ty) || isa<VectorType>(Ty) || isa<StructType>(Ty))
+    if (Ty->isArrayTy() || Ty->isVectorTy() || Ty->isStructTy())
       Name = getTypeName(Ty, false, true);
     // Type with no need to declare.
     else continue;
@@ -1688,7 +1689,8 @@ void MSILWriter::printExternals() {
 bool MSILTarget::addPassesToEmitWholeFile(PassManager &PM,
                                           formatted_raw_ostream &o,
                                           CodeGenFileType FileType,
-                                          CodeGenOpt::Level OptLevel)
+                                          CodeGenOpt::Level OptLevel,
+                                          bool DisableVerify)
 {
   if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
   MSILWriter* Writer = new MSILWriter(o);
diff --git a/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp b/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp
index def5fc6..7a35eb0 100644
--- a/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp
+++ b/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp
@@ -98,12 +98,19 @@ void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
     bool isMemOp  = Modifier && !strcmp(Modifier, "mem");
     uint64_t Offset = MO.getOffset();
 
-    O << (isMemOp ? '&' : '#');
+    // If the global address expression is a part of displacement field with a
+    // register base, we should not emit any prefix symbol here, e.g.
+    //   mov.w &foo, r1
+    // vs
+    //   mov.w glb(r1), r2
+    // Otherwise (!) msp430-as will silently miscompile the output :(
+    if (!Modifier || strcmp(Modifier, "nohash"))
+      O << (isMemOp ? '&' : '#');
     if (Offset)
       O << '(' << Offset << '+';
 
     O << *GetGlobalValueSymbol(MO.getGlobal());
-    
+
     if (Offset)
       O << ')';
 
@@ -124,15 +131,11 @@ void MSP430AsmPrinter::printSrcMemOperand(const MachineInstr *MI, int OpNum,
   const MachineOperand &Disp = MI->getOperand(OpNum+1);
 
   // Print displacement first
-  if (!Disp.isImm()) {
-    printOperand(MI, OpNum+1, "mem");
-  } else {
-    if (!Base.getReg())
-      O << '&';
-
-    printOperand(MI, OpNum+1, "nohash");
-  }
 
+  // Imm here is in fact global address - print extra modifier.
+  if (Disp.isImm() && !Base.getReg())
+    O << '&';
+  printOperand(MI, OpNum+1, "nohash");
 
   // Print register base field
   if (Base.getReg()) {
diff --git a/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp b/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp
index f6565bd..d7636e6 100644
--- a/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp
+++ b/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp
@@ -62,21 +62,26 @@ void MSP430InstPrinter::printSrcMemOperand(const MCInst *MI, unsigned OpNo,
   const MCOperand &Disp = MI->getOperand(OpNo+1);
 
   // Print displacement first
-  if (Disp.isExpr()) {
-    O << '&' << *Disp.getExpr();
-  } else {
-    assert(Disp.isImm() && "Expected immediate in displacement field");
-    if (!Base.getReg())
-      O << '&';
 
+  // If the global address expression is a part of displacement field with a
+  // register base, we should not emit any prefix symbol here, e.g.
+  //   mov.w &foo, r1
+  // vs
+  //   mov.w glb(r1), r2
+  // Otherwise (!) msp430-as will silently miscompile the output :(
+  if (!Base.getReg())
+    O << '&';
+
+  if (Disp.isExpr())
+    O << *Disp.getExpr();
+  else {
+    assert(Disp.isImm() && "Expected immediate in displacement field");
     O << Disp.getImm();
   }
 
-
   // Print register base field
-  if (Base.getReg()) {
+  if (Base.getReg())
     O << '(' << getRegisterName(Base.getReg()) << ')';
-  }
 }
 
 void MSP430InstPrinter::printCCOperand(const MCInst *MI, unsigned OpNo) {
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index 4eec757..911cfcb 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -26,26 +26,12 @@
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/Target/TargetLowering.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
-
 using namespace llvm;
 
-#ifndef NDEBUG
-static cl::opt<bool>
-ViewRMWDAGs("view-msp430-rmw-dags", cl::Hidden,
-          cl::desc("Pop up a window to show isel dags after RMW preprocess"));
-#else
-static const bool ViewRMWDAGs = false;
-#endif
-
-STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
-
-
 namespace {
   struct MSP430ISelAddressMode {
     enum {
@@ -123,8 +109,6 @@ namespace {
         Lowering(*TM.getTargetLowering()),
         Subtarget(*TM.getSubtargetImpl()) { }
 
-    virtual void InstructionSelect();
-
     virtual const char *getPassName() const {
       return "MSP430 DAG->DAG Pattern Instruction Selection";
     }
@@ -133,9 +117,6 @@ namespace {
     bool MatchWrapper(SDValue N, MSP430ISelAddressMode &AM);
     bool MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM);
 
-    bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
-                                    SDNode *Root) const;
-
     virtual bool
     SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
                                  std::vector<SDValue> &OutOps);
@@ -144,18 +125,12 @@ namespace {
   #include "MSP430GenDAGISel.inc"
 
   private:
-    DenseMap<SDNode*, SDNode*> RMWStores;
-    void PreprocessForRMW();
     SDNode *Select(SDNode *N);
     SDNode *SelectIndexedLoad(SDNode *Op);
     SDNode *SelectIndexedBinOp(SDNode *Op, SDValue N1, SDValue N2,
                                unsigned Opc8, unsigned Opc16);
 
     bool SelectAddr(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Disp);
-
-  #ifndef NDEBUG
-    unsigned Indent;
-  #endif
   };
 }  // end anonymous namespace
 
@@ -217,10 +192,7 @@ bool MSP430DAGToDAGISel::MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM)
 }
 
 bool MSP430DAGToDAGISel::MatchAddress(SDValue N, MSP430ISelAddressMode &AM) {
-  DEBUG({
-      errs() << "MatchAddress: ";
-      AM.dump();
-    });
+  DEBUG(errs() << "MatchAddress: "; AM.dump());
 
   switch (N.getOpcode()) {
   default: break;
@@ -336,270 +308,6 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
   return false;
 }
 
-bool MSP430DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
-                                                    SDNode *Root) const {
-  if (OptLevel == CodeGenOpt::None) return false;
-
-  /// RMW preprocessing creates the following code:
-  ///         [Load1]
-  ///         ^     ^
-  ///        /      |
-  ///       /       |
-  ///       [Load2] |
-  ///       ^    ^  |
-  ///       |    |  |
-  ///       |     \-|
-  ///       |       |
-  ///       |     [Op]
-  ///       |       ^
-  ///       |       |
-  ///       \      /
-  ///        \    /
-  ///       [Store]
-  ///
-  /// The path Store => Load2 => Load1 is via chain. Note that in general it is
-  /// not allowed to fold Load1 into Op (and Store) since it will creates a
-  /// cycle. However, this is perfectly legal for the loads moved below the
-  /// TokenFactor by PreprocessForRMW. Query the map Store => Load1 (created
-  /// during preprocessing) to determine whether it's legal to introduce such
-  /// "cycle" for a moment.
-  DenseMap<SDNode*, SDNode*>::const_iterator I = RMWStores.find(Root);
-  if (I != RMWStores.end() && I->second == N)
-    return true;
-
-  // Proceed to 'generic' cycle finder code
-  return SelectionDAGISel::IsLegalAndProfitableToFold(N, U, Root);
-}
-
-
-/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand
-/// and move load below the TokenFactor. Replace store's chain operand with
-/// load's chain result.
-static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load,
-                                 SDValue Store, SDValue TF) {
-  SmallVector<SDValue, 4> Ops;
-  for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i)
-    if (Load.getNode() == TF.getOperand(i).getNode())
-      Ops.push_back(Load.getOperand(0));
-    else
-      Ops.push_back(TF.getOperand(i));
-  SDValue NewTF = CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size());
-  SDValue NewLoad = CurDAG->UpdateNodeOperands(Load, NewTF,
-                                               Load.getOperand(1),
-                                               Load.getOperand(2));
-  CurDAG->UpdateNodeOperands(Store, NewLoad.getValue(1), Store.getOperand(1),
-                             Store.getOperand(2), Store.getOperand(3));
-}
-
-/// MoveBelowTokenFactor2 - Replace TokenFactor operand with load's chain operand
-/// and move load below the TokenFactor. Replace store's chain operand with
-/// load's chain result. This a version which sinks two loads below token factor.
-/// Look into PreprocessForRMW comments for explanation of transform.
-static void MoveBelowTokenFactor2(SelectionDAG *CurDAG,
-                                  SDValue Load1, SDValue Load2,
-                                  SDValue Store, SDValue TF) {
-  SmallVector<SDValue, 4> Ops;
-  for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i) {
-    SDNode* N = TF.getOperand(i).getNode();
-    if (Load2.getNode() == N)
-      Ops.push_back(Load2.getOperand(0));
-    else if (Load1.getNode() != N)
-      Ops.push_back(TF.getOperand(i));
-  }
-
-  SDValue NewTF = SDValue(CurDAG->MorphNodeTo(TF.getNode(),
-                                  TF.getOpcode(),
-                                  TF.getNode()->getVTList(),
-                                  &Ops[0], Ops.size()), TF.getResNo());
-  SDValue NewLoad2 = CurDAG->UpdateNodeOperands(Load2, NewTF,
-                                                Load2.getOperand(1),
-                                                Load2.getOperand(2));
-
-  SDValue NewLoad1 = CurDAG->UpdateNodeOperands(Load1, NewLoad2.getValue(1),
-                                                Load1.getOperand(1),
-                                                Load1.getOperand(2));
-
-  CurDAG->UpdateNodeOperands(Store,
-                             NewLoad1.getValue(1),
-                             Store.getOperand(1),
-                             Store.getOperand(2), Store.getOperand(3));
-}
-
-/// isAllowedToSink - return true if N a load which can be moved below token
-/// factor. Basically, the load should be non-volatile and has single use.
-static bool isLoadAllowedToSink(SDValue N, SDValue Chain) {
-  if (N.getOpcode() == ISD::BIT_CONVERT)
-    N = N.getOperand(0);
-
-  LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
-  if (!LD || LD->isVolatile())
-    return false;
-  if (LD->getAddressingMode() != ISD::UNINDEXED)
-    return false;
-
-  ISD::LoadExtType ExtType = LD->getExtensionType();
-  if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD)
-    return false;
-
-  return (N.hasOneUse() &&
-          LD->hasNUsesOfValue(1, 1) &&
-          LD->isOperandOf(Chain.getNode()));
-}
-
-
-/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG.
-/// The chain produced by the load must only be used by the store's chain
-/// operand, otherwise this may produce a cycle in the DAG.
-static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address,
-                      SDValue &Load) {
-  if (isLoadAllowedToSink(N, Chain) &&
-      N.getOperand(1) == Address) {
-    Load = N;
-    return true;
-  }
-  return false;
-}
-
-/// PreprocessForRMW - Preprocess the DAG to make instruction selection better.
-/// This is only run if not in -O0 mode.
-/// This allows the instruction selector to pick more read-modify-write
-/// instructions. This is a common case:
-///
-///     [Load chain]
-///         ^
-///         |
-///       [Load]
-///       ^    ^
-///       |    |
-///      /      \-
-///     /         |
-/// [TokenFactor] [Op]
-///     ^          ^
-///     |          |
-///      \        /
-///       \      /
-///       [Store]
-///
-/// The fact the store's chain operand != load's chain will prevent the
-/// (store (op (load))) instruction from being selected. We can transform it to:
-///
-///     [Load chain]
-///         ^
-///         |
-///    [TokenFactor]
-///         ^
-///         |
-///       [Load]
-///       ^    ^
-///       |    |
-///       |     \-
-///       |       |
-///       |     [Op]
-///       |       ^
-///       |       |
-///       \      /
-///        \    /
-///       [Store]
-///
-/// We also recognize the case where second operand of Op is load as well and
-/// move it below token factor as well creating DAG as follows:
-///
-///       [Load chain]
-///            ^
-///            |
-///      [TokenFactor]
-///            ^
-///            |
-///         [Load1]
-///         ^     ^
-///        /      |
-///       /       |
-///       [Load2] |
-///       ^    ^  |
-///       |    |  |
-///       |     \-|
-///       |       |
-///       |     [Op]
-///       |       ^
-///       |       |
-///       \      /
-///        \    /
-///       [Store]
-///
-/// This allows selection of mem-mem instructions. Yay!
-
-void MSP430DAGToDAGISel::PreprocessForRMW() {
-  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
-         E = CurDAG->allnodes_end(); I != E; ++I) {
-    if (!ISD::isNON_TRUNCStore(I))
-      continue;
-    SDValue Chain = I->getOperand(0);
-
-    if (Chain.getNode()->getOpcode() != ISD::TokenFactor)
-      continue;
-
-    SDValue N1 = I->getOperand(1);
-    SDValue N2 = I->getOperand(2);
-    if ((N1.getValueType().isFloatingPoint() &&
-         !N1.getValueType().isVector()) ||
-        !N1.hasOneUse())
-      continue;
-
-    unsigned RModW = 0;
-    SDValue Load1, Load2;
-    unsigned Opcode = N1.getNode()->getOpcode();
-    switch (Opcode) {
-    case ISD::ADD:
-    case ISD::AND:
-    case ISD::OR:
-    case ISD::XOR:
-    case ISD::ADDC:
-    case ISD::ADDE: {
-      SDValue N10 = N1.getOperand(0);
-      SDValue N11 = N1.getOperand(1);
-      if (isRMWLoad(N10, Chain, N2, Load1)) {
-        if (isLoadAllowedToSink(N11, Chain)) {
-          Load2 = N11;
-          RModW = 2;
-        } else
-          RModW = 1;
-      } else if (isRMWLoad(N11, Chain, N2, Load1)) {
-        if (isLoadAllowedToSink(N10, Chain)) {
-          Load2 = N10;
-          RModW = 2;
-        } else
-          RModW = 1;
-      }
-      break;
-    }
-    case ISD::SUB:
-    case ISD::SUBC:
-    case ISD::SUBE: {
-      SDValue N10 = N1.getOperand(0);
-      SDValue N11 = N1.getOperand(1);
-      if (isRMWLoad(N10, Chain, N2, Load1)) {
-        if (isLoadAllowedToSink(N11, Chain)) {
-          Load2 = N11;
-          RModW = 2;
-        } else
-          RModW = 1;
-      }
-      break;
-    }
-    }
-
-    NumLoadMoved += RModW;
-    if (RModW == 1)
-      MoveBelowTokenFactor(CurDAG, Load1, SDValue(I, 0), Chain);
-    else if (RModW == 2) {
-      MoveBelowTokenFactor2(CurDAG, Load1, Load2, SDValue(I, 0), Chain);
-      SDNode* Store = I;
-      RMWStores[Store] = Load2.getNode();
-    }
-  }
-}
-
-
 static bool isValidIndexedLoad(const LoadSDNode *LD) {
   ISD::MemIndexedMode AM = LD->getAddressingMode();
   if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD)
@@ -656,7 +364,7 @@ SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDNode *Op,
                                                unsigned Opc8, unsigned Opc16) {
   if (N1.getOpcode() == ISD::LOAD &&
       N1.hasOneUse() &&
-      IsLegalAndProfitableToFold(N1.getNode(), Op, Op)) {
+      IsLegalToFold(N1, Op, Op)) {
     LoadSDNode *LD = cast<LoadSDNode>(N1);
     if (!isValidIndexedLoad(LD))
       return NULL;
@@ -682,46 +390,19 @@ SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDNode *Op,
 }
 
 
-/// InstructionSelect - This callback is invoked by
-/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-void MSP430DAGToDAGISel::InstructionSelect() {
-  std::string BlockName;
-  if (ViewRMWDAGs)
-    BlockName = MF->getFunction()->getNameStr() + ":" +
-                BB->getBasicBlock()->getNameStr();
-
-  PreprocessForRMW();
-
-  if (ViewRMWDAGs) CurDAG->viewGraph("RMW preprocessed:" + BlockName);
-
-  DEBUG(errs() << "Selection DAG after RMW preprocessing:\n");
-  DEBUG(CurDAG->dump());
-
-  // Codegen the basic block.
-  DEBUG(errs() << "===== Instruction selection begins:\n");
-  DEBUG(Indent = 0);
-  SelectRoot(*CurDAG);
-  DEBUG(errs() << "===== Instruction selection ends:\n");
-
-  CurDAG->RemoveDeadNodes();
-  RMWStores.clear();
-}
-
 SDNode *MSP430DAGToDAGISel::Select(SDNode *Node) {
   DebugLoc dl = Node->getDebugLoc();
 
   // Dump information about the Node being selected
-  DEBUG(errs().indent(Indent) << "Selecting: ");
+  DEBUG(errs() << "Selecting: ");
   DEBUG(Node->dump(CurDAG));
   DEBUG(errs() << "\n");
-  DEBUG(Indent += 2);
 
   // If we have a custom node, we already have selected!
   if (Node->isMachineOpcode()) {
-    DEBUG(errs().indent(Indent-2) << "== ";
+    DEBUG(errs() << "== ";
           Node->dump(CurDAG);
           errs() << "\n");
-    DEBUG(Indent -= 2);
     return NULL;
   }
 
@@ -809,13 +490,12 @@ SDNode *MSP430DAGToDAGISel::Select(SDNode *Node) {
   // Select the default instruction
   SDNode *ResNode = SelectCode(Node);
 
-  DEBUG(errs() << std::string(Indent-2, ' ') << "=> ");
+  DEBUG(errs() << "=> ");
   if (ResNode == NULL || ResNode == Node)
     DEBUG(Node->dump(CurDAG));
   else
     DEBUG(ResNode->dump(CurDAG));
   DEBUG(errs() << "\n");
-  DEBUG(Indent -= 2);
 
   return ResNode;
 }
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index ef81f51..e6c7e1e 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -31,8 +31,8 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -371,7 +371,8 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
       //from this parameter
       SDValue FIN = DAG.getFrameIndex(FI, MVT::i16);
       InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
-                                   PseudoSourceValue::getFixedStack(FI), 0));
+                                   PseudoSourceValue::getFixedStack(FI), 0,
+                                   false, false, 0));
     }
   }
 
@@ -500,7 +501,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
 
       MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
                                          PseudoSourceValue::getStack(),
-                                         VA.getLocMemOffset()));
+                                         VA.getLocMemOffset(), false, false, 0));
     }
   }
 
@@ -794,18 +795,15 @@ SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
      if (andCC) {
        // C = ~Z, thus Res = SRW & 1, no processing is required
      } else {
-       // Res = (SRW >> 1) & 1
+       // Res = ~((SRW >> 1) & 1)
        Shift = true;
+       Invert = true;
      }
      break;
    case MSP430CC::COND_E:
-     if (andCC) {
-       // C = ~Z, thus Res = ~(SRW & 1)
-     } else {
-       // Res = ~((SRW >> 1) & 1)
-       Shift = true;
-     }
-     Invert = true;
+     Shift = true;
+     // C = ~Z for AND instruction, thus we can put Res = ~(SRW & 1), however,
+     // Res = (SRW >> 1) & 1 is 1 word shorter.
      break;
   }
   EVT VT = Op.getValueType();
@@ -893,13 +891,13 @@ SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
     return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
                        DAG.getNode(ISD::ADD, dl, getPointerTy(),
                                    FrameAddr, Offset),
-                       NULL, 0);
+                       NULL, 0, false, false, 0);
   }
 
   // Just load the return address.
   SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
   return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
-                     RetAddrFI, NULL, 0);
+                     RetAddrFI, NULL, 0, false, false, 0);
 }
 
 SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
@@ -911,7 +909,8 @@ SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
                                          MSP430::FPW, VT);
   while (Depth--)
-    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0);
+    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0,
+                            false, false, 0);
   return FrameAddr;
 }
 
@@ -971,7 +970,7 @@ const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const {
 
 bool MSP430TargetLowering::isTruncateFree(const Type *Ty1,
                                           const Type *Ty2) const {
-  if (!Ty1->isInteger() || !Ty2->isInteger())
+  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
     return false;
 
   return (Ty1->getPrimitiveSizeInBits() > Ty2->getPrimitiveSizeInBits());
@@ -986,7 +985,7 @@ bool MSP430TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
 
 bool MSP430TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const {
   // MSP430 implicitly zero-extends 8-bit results in 16-bit registers.
-  return 0 && Ty1->isInteger(8) && Ty2->isInteger(16);
+  return 0 && Ty1->isIntegerTy(8) && Ty2->isIntegerTy(16);
 }
 
 bool MSP430TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td
index bb06f7b..144ba26 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/lib/Target/MSP430/MSP430InstrInfo.td
@@ -250,7 +250,7 @@ def MOV16ri : I16ri<0x0,
                     [(set GR16:$dst, imm:$src)]>;
 }
 
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in {
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
 def MOV8rm  : I8rm<0x0,
                    (outs GR8:$dst), (ins memsrc:$src),
                    "mov.b\t{$src, $dst}",
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index f1d4a67..c4746db 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -59,8 +59,6 @@ public:
   SelectionDAGISel(tm),
   TM(tm), Subtarget(tm.getSubtarget<MipsSubtarget>()) {}
   
-  virtual void InstructionSelect();
-
   // Pass Name
   virtual const char *getPassName() const {
     return "MIPS DAG->DAG Pattern Instruction Selection";
@@ -98,29 +96,10 @@ private:
   inline SDValue getI32Imm(unsigned Imm) {
     return CurDAG->getTargetConstant(Imm, MVT::i32);
   }
-
-
-  #ifndef NDEBUG
-  unsigned Indent;
-  #endif
 };
 
 }
 
-/// InstructionSelect - This callback is invoked by
-/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-void MipsDAGToDAGISel::InstructionSelect() {
-  // Codegen the basic block.
-  DEBUG(errs() << "===== Instruction selection begins:\n");
-  DEBUG(Indent = 0);
-
-  // Select target instructions for the DAG.
-  SelectRoot(*CurDAG);
-
-  DEBUG(errs() << "===== Instruction selection ends:\n");
-
-  CurDAG->RemoveDeadNodes();
-}
 
 /// getGlobalBaseReg - Output the instructions required to put the
 /// GOT address into a register.
@@ -329,17 +308,11 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
   DebugLoc dl = Node->getDebugLoc();
 
   // Dump information about the Node being selected
-  DEBUG(errs().indent(Indent) << "Selecting: ";
-        Node->dump(CurDAG);
-        errs() << "\n");
-  DEBUG(Indent += 2);
+  DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
 
   // If we have a custom node, we already have selected!
   if (Node->isMachineOpcode()) {
-    DEBUG(errs().indent(Indent-2) << "== ";
-          Node->dump(CurDAG);
-          errs() << "\n");
-    DEBUG(Indent -= 2);
+    DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
     return NULL;
   }
 
@@ -547,14 +520,12 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
   // Select the default instruction
   SDNode *ResNode = SelectCode(Node);
 
-  DEBUG(errs().indent(Indent-2) << "=> ");
+  DEBUG(errs() << "=> ");
   if (ResNode == NULL || ResNode == Node)
     DEBUG(Node->dump(CurDAG));
   else
     DEBUG(ResNode->dump(CurDAG));
   DEBUG(errs() << "\n");
-  DEBUG(Indent -= 2);
-
   return ResNode;
 }
 
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index d94944f..584b887 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -510,7 +510,8 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) {
     SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32, 0,
                                             MipsII::MO_GOT);
     SDValue ResNode = DAG.getLoad(MVT::i32, dl, 
-                                  DAG.getEntryNode(), GA, NULL, 0);
+                                  DAG.getEntryNode(), GA, NULL, 0,
+                                  false, false, 0);
     // On functions and global targets not internal linked only
     // a load from got/GP is necessary for PIC to work.
     if (!GV->hasLocalLinkage() || isa<Function>(GV))
@@ -549,7 +550,8 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG)
     SDValue Ops[] = { JTI };
     HiPart = DAG.getNode(MipsISD::Hi, dl, DAG.getVTList(MVT::i32), Ops, 1);
   } else // Emit Load from Global Pointer
-    HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI, NULL, 0);
+    HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI, NULL, 0,
+                         false, false, 0);
 
   SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, JTI);
   ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
@@ -586,7 +588,7 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG)
     SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), 
                                       N->getOffset(), MipsII::MO_GOT);
     SDValue Load = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), 
-                                 CP, NULL, 0);
+                               CP, NULL, 0, false, false, 0);
     SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP);
     ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo);
   }
@@ -601,7 +603,8 @@ SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) {
   // vastart just stores the address of the VarArgsFrameIndex slot into the
   // memory location argument.
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
-  return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1), SV, 0);
+  return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1), SV, 0,
+                      false, false, 0);
 }
 
 //===----------------------------------------------------------------------===//
@@ -859,7 +862,8 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
 
     // emit ISD::STORE whichs stores the 
     // parameter value to a stack Location
-    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
+    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+                                       false, false, 0));
   }
 
   // Transform all store nodes into one single node because all store
@@ -933,7 +937,8 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       // Reload GP value.
       FI = MipsFI->getGPFI();
       SDValue FIN = DAG.getFrameIndex(FI,getPointerTy());
-      SDValue GPLoad = DAG.getLoad(MVT::i32, dl, Chain, FIN, NULL, 0);
+      SDValue GPLoad = DAG.getLoad(MVT::i32, dl, Chain, FIN, NULL, 0,
+                                   false, false, 0);
       Chain = GPLoad.getValue(1);
       Chain = DAG.getCopyToReg(Chain, dl, DAG.getRegister(Mips::GP, MVT::i32), 
                                GPLoad, SDValue(0,0));
@@ -1097,7 +1102,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
 
       // Create load nodes to retrieve arguments from the stack
       SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
-      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0));
+      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0,
+                                   false, false, 0));
     }
   }
 
@@ -1132,7 +1138,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
       int FI = MFI->CreateFixedObject(4, 0, true, false);
       MipsFI->recordStoreVarArgsFI(FI, -(4+(StackLoc*4)));
       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
-      OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0));
+      OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0,
+                                       false, false, 0));
 
       // Record the frame index of the first variable argument
       // which is a value necessary to VASTART.
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index e67bcbf..cef3697 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -120,7 +120,7 @@ def immZExt5 : PatLeaf<(imm), [{
 
 // Mips Address Mode! SDNode frameindex could possibily be a match
 // since load and store instructions from stack used it.
-def addr : ComplexPattern<i32, 2, "SelectAddr", [frameindex], []>;
+def addr : ComplexPattern<iPTR, 2, "SelectAddr", [frameindex], []>;
 
 //===----------------------------------------------------------------------===//
 // Instructions specific format
@@ -300,9 +300,8 @@ class JumpFR<bits<6> op, bits<6> func, string instr_asm>:
 // Jump and Link (Call)
 let isCall=1, hasDelaySlot=1,
   // All calls clobber the non-callee saved registers...
-  Defs = [AT, V0, V1, A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, 
-          K0, K1, F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13,
-          F14, F15, F16, F17, F18, F19], Uses = [GP] in {
+  Defs = [AT, V0, V1, A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9,
+          K0, K1, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9], Uses = [GP] in {
   class JumpLink<bits<6> op, string instr_asm>:
     FJ< op,
         (outs),
@@ -593,8 +592,8 @@ def : Pat<(MipsJmpLink (i32 tglobaladdr:$dst)),
           (JAL tglobaladdr:$dst)>;
 def : Pat<(MipsJmpLink (i32 texternalsym:$dst)),
           (JAL texternalsym:$dst)>;
-def : Pat<(MipsJmpLink CPURegs:$dst),
-          (JALR CPURegs:$dst)>;
+//def : Pat<(MipsJmpLink CPURegs:$dst),
+//          (JALR CPURegs:$dst)>;
 
 // hi/lo relocs
 def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>;
diff --git a/lib/Target/Mips/MipsTargetObjectFile.h b/lib/Target/Mips/MipsTargetObjectFile.h
index 32e0436..237b160 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.h
+++ b/lib/Target/Mips/MipsTargetObjectFile.h
@@ -10,7 +10,7 @@
 #ifndef LLVM_TARGET_MIPS_TARGETOBJECTFILE_H
 #define LLVM_TARGET_MIPS_TARGETOBJECTFILE_H
 
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 
 namespace llvm {
 
diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
index 72f7c16..44a6cc0 100644
--- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
+++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
@@ -106,8 +106,9 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   DbgInfo.BeginFunction(MF);
 
   // Now emit the instructions of function in its code section.
-  const MCSection *fCodeSection 
-    = getObjFileLowering().SectionForCode(CurrentFnSym->getName());
+  const MCSection *fCodeSection = 
+    getObjFileLowering().SectionForCode(CurrentFnSym->getName(), 
+                                        PAN::isISR(F->getSection()));
 
   // Start the Code Section.
   O <<  "\n";
@@ -157,6 +158,7 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 // printOperand - print operand of insn.
 void PIC16AsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
   const MachineOperand &MO = MI->getOperand(opNum);
+  const Function *F = MI->getParent()->getParent()->getFunction();
 
   switch (MO.getType()) {
     case MachineOperand::MO_Register:
@@ -189,19 +191,18 @@ void PIC16AsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
     }
     case MachineOperand::MO_ExternalSymbol: {
        const char *Sname = MO.getSymbolName();
+       std::string Printname = Sname;
 
-      // If its a libcall name, record it to decls section.
-      if (PAN::getSymbolTag(Sname) == PAN::LIBCALL)
-        LibcallDecls.push_back(Sname);
-
-      // Record a call to intrinsic to print the extern declaration for it.
-      std::string Sym = Sname;  
-      if (PAN::isMemIntrinsic(Sym)) {
-        Sym = PAN::addPrefix(Sym);
-        LibcallDecls.push_back(createESName(Sym));
+      // Intrinsic stuff needs to be renamed if we are printing IL fn. 
+      if (PAN::isIntrinsicStuff(Printname)) {
+        if (PAN::isISR(F->getSection())) {
+          Printname = PAN::Rename(Sname);
+        }
+        // Record these decls, we need to print them in asm as extern.
+        LibcallDecls.push_back(createESName(Printname));
       }
 
-      O << Sym;
+      O << Printname;
       break;
     }
     case MachineOperand::MO_MachineBasicBlock:
@@ -247,8 +248,6 @@ void PIC16AsmPrinter::printLibcallDecls() {
   for (std::list<const char*>::const_iterator I = LibcallDecls.begin(); 
        I != LibcallDecls.end(); I++) {
     O << MAI->getExternDirective() << *I << "\n";
-    O << MAI->getExternDirective() << PAN::getArgsLabel(*I) << "\n";
-    O << MAI->getExternDirective() << PAN::getRetvalLabel(*I) << "\n";
   }
   O << MAI->getCommentString() << "External decls for libcalls - END." <<"\n";
 }
diff --git a/lib/Target/PIC16/PIC16ABINames.h b/lib/Target/PIC16/PIC16ABINames.h
index e18ddf1..4c1a8da 100644
--- a/lib/Target/PIC16/PIC16ABINames.h
+++ b/lib/Target/PIC16/PIC16ABINames.h
@@ -178,18 +178,21 @@ namespace llvm {
       return Func1 + tag;
     }
 
+    // Get the retval label for the given function.
     static std::string getRetvalLabel(const std::string &Func) {
       std::string Func1 = addPrefix(Func);
       std::string tag = getTagName(RET_LABEL);
       return Func1 + tag;
     }
 
+    // Get the argument label for the given function.
     static std::string getArgsLabel(const std::string &Func) {
       std::string Func1 = addPrefix(Func);
       std::string tag = getTagName(ARGS_LABEL);
       return Func1 + tag;
     }
 
+    // Get the tempdata label for the given function.
     static std::string getTempdataLabel(const std::string &Func) {
       std::string Func1 = addPrefix(Func);
       std::string tag = getTagName(TEMPS_LABEL);
@@ -263,6 +266,7 @@ namespace llvm {
       return false;
     }
 
+
     inline static bool isMemIntrinsic (const std::string &Name) {
       if (Name.compare("@memcpy") == 0 || Name.compare("@memset") == 0 ||
           Name.compare("@memmove") == 0) {
@@ -272,6 +276,41 @@ namespace llvm {
       return false;
     }
 
+    // Currently names of libcalls are assigned during TargetLowering
+    // object construction. There is no provision to change the when the 
+    // code for a function IL function being generated. 
+    // So we have to change these names while printing assembly.
+    // We need to do that mainly for names related to intrinsics. This
+    // function returns true if a name needs to be cloned. 
+    inline static bool isIntrinsicStuff(const std::string &Name) {
+      // Return true if the name contains LIBCALL marker, or a MemIntrinisc.
+      // these are mainly ARGS_LABEL, RET_LABEL, and the LIBCALL name itself.
+      if ((Name.find(getTagName(LIBCALL)) != std::string::npos) 
+          || isMemIntrinsic(Name))
+        return true;
+ 
+      return false;
+    }
+
+    // Rename the name for IL.
+    inline static std::string Rename(const std::string &Name) {
+      std::string Newname;
+      // If its a label (LIBCALL+Func+LABEL), change it to
+      // (LIBCALL+Func+IL+LABEL).
+      TAGS id = getSymbolTag(Name);
+      if (id == ARGS_LABEL || id == RET_LABEL) {
+        std::size_t pos = Name.find(getTagName(id));
+        Newname = Name.substr(0, pos) + ".IL" + getTagName(id);
+        return Newname;
+      }
+ 
+      // Else, just append IL to name. 
+      return Name + ".IL";
+   }
+    
+    
+   
+
     inline static bool isLocalToFunc (std::string &Func, std::string &Var) {
       if (! isLocalName(Var)) return false;
 
@@ -325,6 +364,35 @@ namespace llvm {
 
       return o.str();
     } 
+
+    // Return true if the current function is an ISR
+    inline static bool isISR(const std::string SectName) {
+       if (SectName.find("interrupt") != std::string::npos)
+         return true;
+
+       return false;
+    }
+
+    // Return the address for ISR starts in rom.
+    inline static std::string getISRAddr(void) {
+      return "0x4";
+    }
+
+    // Returns the name of clone of a function.
+    static std::string getCloneFnName(const std::string &Func) {
+       return (Func + ".IL");
+    }
+
+    // Returns the name of clone of a variable.
+    static std::string getCloneVarName(const std::string &Fn, 
+                                       const std::string &Var) {
+      std::string cloneVarName = Var;
+      // These vars are named like fun.auto.var.
+      // Just replace the function name, with clone function name.
+      std::string cloneFnName = getCloneFnName(Fn);
+      cloneVarName.replace(cloneVarName.find(Fn), Fn.length(), cloneFnName);
+      return cloneVarName;
+    }
   }; // class PAN.
 } // end namespace llvm;
 
diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp
index c517b1b..877e4ff 100644
--- a/lib/Target/PIC16/PIC16DebugInfo.cpp
+++ b/lib/Target/PIC16/PIC16DebugInfo.cpp
@@ -419,7 +419,7 @@ void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int Num,
   if (TagName != "")
     O << ", " << TagName;
   for (int i = 0; i<Num; i++)
-    O << "," << Aux[i];
+    O << "," << (Aux[i] && 0xff);
 }
 
 /// EmitSymbol - Emit .def for a symbol. Value is offset for the member.
diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp
index 82197ae..6cbd002 100644
--- a/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp
+++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp
@@ -14,10 +14,7 @@
 #define DEBUG_TYPE "pic16-isel"
 
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
 #include "PIC16ISelDAGToDAG.h"
-#include "llvm/Support/Debug.h"
-
 using namespace llvm;
 
 /// createPIC16ISelDag - This pass converts a legalized DAG into a
@@ -27,13 +24,6 @@ FunctionPass *llvm::createPIC16ISelDag(PIC16TargetMachine &TM) {
 }
 
 
-/// InstructionSelect - This callback is invoked by
-/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-void PIC16DAGToDAGISel::InstructionSelect() {
-  SelectRoot(*CurDAG);
-  CurDAG->RemoveDeadNodes();
-}
-
 /// Select - Select instructions not customized! Used for
 /// expanded, promoted and normal instructions.
 SDNode* PIC16DAGToDAGISel::Select(SDNode *N) {
diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.h b/lib/Target/PIC16/PIC16ISelDAGToDAG.h
index 813a540..8ed5bf7 100644
--- a/lib/Target/PIC16/PIC16ISelDAGToDAG.h
+++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.h
@@ -19,6 +19,8 @@
 #include "PIC16TargetMachine.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Intrinsics.h"
 using namespace llvm;
 
@@ -46,8 +48,6 @@ public:
     return "PIC16 DAG->DAG Pattern Instruction Selection";
   } 
 
-  virtual void InstructionSelect();
-  
 private:
   // Include the pieces autogenerated from the target description.
 #include "PIC16GenDAGISel.inc"
diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp
index 7754a4f..d17abb9 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.cpp
+++ b/lib/Target/PIC16/PIC16ISelLowering.cpp
@@ -419,8 +419,7 @@ PIC16TargetLowering::MakePIC16Libcall(PIC16ISD::PIC16Libcall Call,
      LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
                  false, 0, CallingConv::C, false,
                  /*isReturnValueUsed=*/true,
-                 Callee, Args, DAG, dl,
-                 DAG.GetOrdering(DAG.getEntryNode().getNode()));
+                 Callee, Args, DAG, dl);
 
   return CallInfo.first;
 }
@@ -622,12 +621,12 @@ SDValue PIC16TargetLowering::ExpandStore(SDNode *N, SelectionDAG &DAG) {
       ChainHi = Chain.getOperand(1);
     }
     SDValue Store1 = DAG.getStore(ChainLo, dl, SrcLo, Ptr, NULL,
-                                  0 + StoreOffset);
+                                  0 + StoreOffset, false, false, 0);
 
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                       DAG.getConstant(4, Ptr.getValueType()));
     SDValue Store2 = DAG.getStore(ChainHi, dl, SrcHi, Ptr, NULL,
-                                  1 + StoreOffset);
+                                  1 + StoreOffset, false, false, 0);
 
     return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1,
                        Store2);
@@ -1513,8 +1512,7 @@ bool PIC16TargetLowering::NeedToConvertToMemOp(SDValue Op, unsigned &MemOp,
       // Direct load operands are folded in binary operations. But before folding
       // verify if this folding is legal. Fold only if it is legal otherwise
       // convert this direct load to a separate memory operation.
-      if(ISel->IsLegalAndProfitableToFold(Op.getOperand(0).getNode(), 
-                                         Op.getNode(), Op.getNode()))
+      if(ISel->IsLegalToFold(Op.getOperand(0), Op.getNode(), Op.getNode()))
         return false;
       else 
         MemOp = 0;
@@ -1528,10 +1526,24 @@ bool PIC16TargetLowering::NeedToConvertToMemOp(SDValue Op, unsigned &MemOp,
     return true;
 
   if (isDirectLoad(Op.getOperand(1))) {
-    if (Op.getOperand(1).hasOneUse())
-      return false;
-    else 
-      MemOp = 1; 
+    if (Op.getOperand(1).hasOneUse()) {
+      // Legal and profitable folding check uses the NodeId of DAG nodes.
+      // This NodeId is assigned by topological order. Therefore first 
+      // assign topological order then perform legal and profitable check.
+      // Note:- Though this ordering is done before begining with legalization,
+      // newly added node during legalization process have NodeId=-1 (NewNode)
+      // therefore before performing any check proper ordering of the node is
+      // required.
+      DAG.AssignTopologicalOrder();
+
+      // Direct load operands are folded in binary operations. But before folding
+      // verify if this folding is legal. Fold only if it is legal otherwise
+      // convert this direct load to a separate memory operation.
+      if(ISel->IsLegalToFold(Op.getOperand(1), Op.getNode(), Op.getNode()))
+         return false;
+      else 
+         MemOp = 1; 
+    }
   }
   return true;
 }  
diff --git a/lib/Target/PIC16/PIC16MemSelOpt.cpp b/lib/Target/PIC16/PIC16MemSelOpt.cpp
index cc71b04..ab81ed1 100644
--- a/lib/Target/PIC16/PIC16MemSelOpt.cpp
+++ b/lib/Target/PIC16/PIC16MemSelOpt.cpp
@@ -59,6 +59,7 @@ namespace {
     const TargetInstrInfo *TII; // Machine instruction info.
     MachineBasicBlock *MBB;     // Current basic block
     std::string CurBank;
+    int PageChanged;
 
   };
   char MemSelOpt::ID = 0;
@@ -93,10 +94,56 @@ bool MemSelOpt::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
   // Let us assume that when entering a basic block now bank is selected.
   // Ideally we should look at the predecessors for this information.
   CurBank=""; 
+  PageChanged=0;
 
-  for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
+  MachineBasicBlock::iterator I;
+  for (I = BB.begin(); I != BB.end(); ++I) {
     Changed |= processInstruction(I);
+
+    // if the page has changed insert a page sel before 
+    // any instruction that needs one
+    if (PageChanged == 1)
+    {
+      // Restore the page if it was changed, before leaving the basic block,
+      // because it may be required by the goto terminator or the fall thru
+      // basic blcok.
+      // If the terminator is return, we don't need to restore since there
+      // is no goto or fall thru basic block.
+      if ((I->getOpcode() == PIC16::sublw_3) || //macro has goto
+          (I->getOpcode() == PIC16::sublw_6) || //macro has goto
+          (I->getOpcode() == PIC16::addlwc)  || //macro has goto
+          (TII->get(I->getOpcode()).isBranch()))
+      {
+        DebugLoc dl = I->getDebugLoc();
+        BuildMI(*MBB, I, dl, TII->get(PIC16::pagesel)).addExternalSymbol("$");
+        Changed = true;
+        PageChanged = 0;	    
+      }
+    }
   }
+
+   // The basic block is over, but if we did not find any goto yet,
+   // we haven't restored the page.
+   // Restore the page if it was changed, before leaving the basic block,
+   // because it may be required by fall thru basic blcok.
+   // If the terminator is return, we don't need to restore since there
+   // is fall thru basic block.
+   if (PageChanged == 1) {
+      // save the end pointer before we move back to last insn.
+     MachineBasicBlock::iterator J = I;
+     I--;
+     const TargetInstrDesc &TID = TII->get(I->getOpcode());
+     if (! TID.isReturn())
+     {
+       DebugLoc dl = I->getDebugLoc();
+       BuildMI(*MBB, J, dl, 
+               TII->get(PIC16::pagesel)).addExternalSymbol("$");
+       Changed = true;
+       PageChanged = 0;
+     }
+   }
+
+
   return Changed;
 }
 
@@ -112,42 +159,74 @@ bool MemSelOpt::processInstruction(MachineInstr *MI) {
   if (!(TID.isBranch() || TID.isCall() || TID.mayLoad() || TID.mayStore()))
     return false;
 
+  // The first thing we should do is that record if banksel/pagesel are
+  // changed in an unknown way. This can happend via any type of call. 
+  // We do it here first before scanning of MemOp / BBOp as the indirect
+  // call insns do not have any operands, but they still may change bank/page.
+  if (TID.isCall()) {
+    // Record that we have changed the page, so that we can restore it
+    // before basic block ends.
+    // We require to signal that a page anc bank change happened even for
+    // indirect calls. 
+    PageChanged = 1;
+
+    // When a call is made, there may be banksel for variables in callee.
+    // Hence the banksel in caller needs to be reset.
+    CurBank = "";
+  }
+
   // Scan for the memory address operand.
   // FIXME: Should we use standard interfaces like memoperands_iterator,
   // hasMemOperand() etc ?
   int MemOpPos = -1;
+  int BBOpPos = -1;
   for (unsigned i = 0; i < NumOperands; i++) {
     MachineOperand Op = MI->getOperand(i);
     if (Op.getType() ==  MachineOperand::MO_GlobalAddress ||
-        Op.getType() ==  MachineOperand::MO_ExternalSymbol || 
-        Op.getType() ==  MachineOperand::MO_MachineBasicBlock) {
+        Op.getType() ==  MachineOperand::MO_ExternalSymbol) { 
       // We found one mem operand. Next one may be BS.
       MemOpPos = i;
-      break;
+    }
+    if (Op.getType() ==  MachineOperand::MO_MachineBasicBlock) {
+      // We found one BB operand. Next one may be pagesel.
+      BBOpPos = i;
     }
   }
 
   // If we did not find an insn accessing memory. Continue.
-  if (MemOpPos == -1) return Changed;
+  if ((MemOpPos == -1) &&
+      (BBOpPos == -1))
+    return false;
+  assert ((BBOpPos != MemOpPos) && "operand can only be of one type");
  
-  // Get the MemOp.
-  MachineOperand &Op = MI->getOperand(MemOpPos);
 
   // If this is a pagesel material, handle it first.
-  if (MI->getOpcode() == PIC16::CALL ||
-      MI->getOpcode() == PIC16::br_uncond) {
+  // CALL and br_ucond insns use MemOp (GA or ES) and not BBOp.
+  // Pagesel is required only for a direct call.
+  if ((MI->getOpcode() == PIC16::CALL)) {
+    // Get the BBOp.
+    MachineOperand &MemOp = MI->getOperand(MemOpPos);
     DebugLoc dl = MI->getDebugLoc();
-    BuildMI(*MBB, MI, dl, TII->get(PIC16::pagesel)).
-      addOperand(Op);
-    return true;
+    BuildMI(*MBB, MI, dl, TII->get(PIC16::pagesel)).addOperand(MemOp);   
+
+    // CALL and br_ucond needs only pagesel. so we are done.
+    return true; 
   }
 
+  // Pagesel is handled. Now, add a Banksel if needed.
+  if (MemOpPos == -1) return Changed;
+  // Get the MemOp.
+  MachineOperand &Op = MI->getOperand(MemOpPos);
+
   // Get the section name(NewBank) for MemOp.
   // This assumes that the section names for globals are already set by
   // AsmPrinter->doInitialization.
   std::string NewBank = CurBank;
+  bool hasExternalLinkage = false;
   if (Op.getType() ==  MachineOperand::MO_GlobalAddress &&
       Op.getGlobal()->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE) {
+    if (Op.getGlobal()->hasExternalLinkage())
+      hasExternalLinkage= true;
     NewBank = Op.getGlobal()->getSection();
   } else if (Op.getType() ==  MachineOperand::MO_ExternalSymbol) {
     // External Symbol is generated for temp data and arguments. They are
@@ -162,7 +241,7 @@ bool MemSelOpt::processInstruction(MachineInstr *MI) {
 
   // If the previous and new section names are same, we don't need to
   // emit banksel. 
-  if (NewBank.compare(CurBank) != 0 ) {
+  if (NewBank.compare(CurBank) != 0 || hasExternalLinkage) {
     DebugLoc dl = MI->getDebugLoc();
     BuildMI(*MBB, MI, dl, TII->get(PIC16::banksel)).
       addOperand(Op);
diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp
new file mode 100644
index 0000000..865da35
--- /dev/null
+++ b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp
@@ -0,0 +1,299 @@
+//===-- PIC16Cloner.cpp - PIC16 LLVM Cloner for shared functions -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to clone all functions that are shared between
+// the main line code (ML) and interrupt line code (IL). It clones all such
+// shared functions and their automatic global vars by adding the .IL suffix. 
+//
+// This pass is supposed to be run on the linked .bc module.
+// It traveses the module call graph twice. Once starting from the main function
+// and marking each reached function as "ML". Again, starting from the ISR
+// and cloning any reachable function that was marked as "ML". After cloning
+// the function, it remaps all the call sites in IL functions to call the
+// cloned functions. 
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Pass.h"
+#include "llvm/Module.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "PIC16Cloner.h"
+#include "../PIC16ABINames.h"
+#include <vector>
+
+using namespace llvm;
+using std::vector;
+using std::string;
+using std::map;
+
+namespace llvm {
+  char PIC16Cloner::ID = 0;
+
+  ModulePass *createPIC16ClonerPass() { return new PIC16Cloner(); }
+}
+
+// We currently intend to run these passes in opt, which does not have any
+// diagnostic support. So use these functions for now. In future
+// we will probably write our own driver tool.
+//
+void PIC16Cloner::reportError(string ErrorString) {
+  errs() << "ERROR : " << ErrorString << "\n";
+  exit(1);
+}
+
+void PIC16Cloner::
+reportError (string ErrorString, vector<string> &Values) {
+  unsigned ValCount = Values.size();
+  string TargetString;
+  for (unsigned i=0; i<ValCount; ++i) {
+    TargetString = "%";
+    TargetString += ((char)i + '0');
+    ErrorString.replace(ErrorString.find(TargetString), TargetString.length(), 
+                        Values[i]);
+  }
+  errs() << "ERROR : " << ErrorString << "\n";
+  exit(1);
+}
+
+
+// Entry point
+//
+bool PIC16Cloner::runOnModule(Module &M) {
+   CallGraph &CG = getAnalysis<CallGraph>();
+
+   // Search for the "main" and "ISR" functions.
+   CallGraphNode *mainCGN = NULL, *isrCGN = NULL;
+   for (CallGraph::iterator it = CG.begin() ; it != CG.end(); it++)
+   {
+     // External calling node doesn't have any function associated with it.
+     if (! it->first)
+       continue;
+     
+     if (it->first->getName().str() == "main") {
+       mainCGN = it->second;
+     }
+
+     if (PAN::isISR(it->first->getSection())) {
+       isrCGN = it->second;
+     }
+ 
+     // Don't search further if we've found both.
+     if (mainCGN && isrCGN)
+       break;
+   }
+
+   // We have nothing to do if any of the main or ISR is missing.
+   if (! mainCGN || ! isrCGN) return false;
+       
+   // Time for some diagnostics.
+   // See if the main itself is interrupt function then report an error.
+   if (PAN::isISR(mainCGN->getFunction()->getSection())) {
+     reportError("Function 'main' can't be interrupt function");
+   }
+
+    
+   // Mark all reachable functions from main as ML.
+   markCallGraph(mainCGN, "ML");
+
+   // And then all the functions reachable from ISR will be cloned.
+   cloneSharedFunctions(isrCGN);
+
+   return true;
+}
+
+// Mark all reachable functions from the given node, with the given mark.
+//
+void PIC16Cloner::markCallGraph(CallGraphNode *CGN, string StringMark) {
+  // Mark the top node first.
+  Function *thisF = CGN->getFunction();
+
+  thisF->setSection(StringMark);
+
+  // Mark all the called functions
+  for(CallGraphNode::iterator cgn_it = CGN->begin();
+              cgn_it != CGN->end(); ++cgn_it) {
+     Function *CalledF = cgn_it->second->getFunction();
+
+     // If calling an external function then CallGraphNode
+     // will not be associated with any function.
+     if (! CalledF)
+       continue;
+  
+     // Issue diagnostic if interrupt function is being called.
+     if (PAN::isISR(CalledF->getSection())) {
+       vector<string> Values;
+       Values.push_back(CalledF->getName().str());
+       reportError("Interrupt function (%0) can't be called", Values); 
+     }
+
+     // Has already been mark 
+     if (CalledF->getSection().find(StringMark) != string::npos) {
+       // Should we do anything here?
+     } else {
+       // Mark now
+       CalledF->setSection(StringMark);
+     }
+
+     // Before going any further mark all the called function by current
+     // function.
+     markCallGraph(cgn_it->second ,StringMark);
+  } // end of loop of all called functions.
+}
+
+
+// For PIC16, automatic variables of a function are emitted as globals.
+// Clone the auto variables of a function  and put them in ValueMap, 
+// this ValueMap will be used while
+// Cloning the code of function itself.
+//
+void PIC16Cloner::CloneAutos(Function *F) {
+  // We'll need to update module's globals list as well. So keep a reference
+  // handy.
+  Module *M = F->getParent();
+  Module::GlobalListType &Globals = M->getGlobalList();
+
+  // Clear the leftovers in ValueMap by any previous cloning.
+  ValueMap.clear();
+
+  // Find the auto globls for this function and clone them, and put them
+  // in ValueMap.
+  std::string FnName = F->getName().str();
+  std::string VarName, ClonedVarName;
+  for (Module::global_iterator I = M->global_begin(), E = M->global_end();
+       I != E; ++I) {
+    VarName = I->getName().str();
+    if (PAN::isLocalToFunc(FnName, VarName)) {
+      // Auto variable for current function found. Clone it.
+      GlobalVariable *GV = I;
+
+      const Type *InitTy = GV->getInitializer()->getType();
+      GlobalVariable *ClonedGV = 
+        new GlobalVariable(InitTy, false, GV->getLinkage(), 
+                           GV->getInitializer());
+      ClonedGV->setName(PAN::getCloneVarName(FnName, VarName));
+      // Add these new globals to module's globals list.
+      Globals.push_back(ClonedGV);
+ 
+      // Update ValueMap.
+      ValueMap[GV] = ClonedGV;
+     }
+  }
+}
+
+
+// Clone all functions that are reachable from ISR and are already 
+// marked as ML.
+//
+void PIC16Cloner::cloneSharedFunctions(CallGraphNode *CGN) {
+
+  // Check all the called functions from ISR.
+  for(CallGraphNode::iterator cgn_it = CGN->begin(); 
+              cgn_it != CGN->end(); ++cgn_it) {
+     Function *CalledF = cgn_it->second->getFunction();
+
+     // If calling an external function then CallGraphNode
+     // will not be associated with any function.
+     if (!CalledF)
+       continue;
+  
+     // Issue diagnostic if interrupt function is being called.
+     if (PAN::isISR(CalledF->getSection())) {
+       vector<string> Values;
+       Values.push_back(CalledF->getName().str());
+       reportError("Interrupt function (%0) can't be called", Values); 
+     }
+
+     if (CalledF->getSection().find("ML") != string::npos) {
+       // Function is alternatively marked. It should be a shared one.
+       // Create IL copy. Passing called function as first argument
+       // and the caller as the second argument.
+
+       // Before making IL copy, first ensure that this function has a 
+       // body. If the function does have a body. It can't be cloned.
+       // Such a case may occur when the function has been declarated
+       // in the C source code but its body exists in assembly file.
+       if (!CalledF->isDeclaration()) {
+         Function *cf = cloneFunction(CalledF);
+         remapAllSites(CGN->getFunction(), CalledF, cf);
+       }  else {
+         // It is called only from ISR. Still mark it as we need this info
+         // in code gen while calling intrinsics.Function is not marked.
+         CalledF->setSection("IL");
+       }
+     }
+     // Before going any further clone all the shared function reachaable 
+     // by current function.
+     cloneSharedFunctions(cgn_it->second);
+  } // end of loop of all called functions.
+}
+
+// Clone the given function and return it.
+// Note: it uses the ValueMap member of the class, which is already populated
+// by cloneAutos by the time we reach here. 
+// FIXME: Should we just pass ValueMap's ref as a parameter here? rather
+// than keeping the ValueMap as a member.
+Function *
+PIC16Cloner::cloneFunction(Function *OrgF) {
+   Function *ClonedF;
+
+   // See if we already cloned it. Return that. 
+   cloned_map_iterator cm_it = ClonedFunctionMap.find(OrgF);
+   if(cm_it != ClonedFunctionMap.end()) {
+     ClonedF = cm_it->second;
+     return ClonedF;
+   }
+
+   // Clone does not exist. 
+   // First clone the autos, and populate ValueMap.
+   CloneAutos(OrgF);
+
+   // Now create the clone.
+   ClonedF = CloneFunction(OrgF, ValueMap);
+
+   // The new function should be for interrupt line. Therefore should have 
+   // the name suffixed with IL and section attribute marked with IL. 
+   ClonedF->setName(PAN::getCloneFnName(OrgF->getName()));
+   ClonedF->setSection("IL");
+
+   // Add the newly created function to the module.
+   OrgF->getParent()->getFunctionList().push_back(ClonedF);
+
+   // Update the ClonedFunctionMap to record this cloning activity.
+   ClonedFunctionMap[OrgF] = ClonedF;
+
+   return ClonedF;
+}
+
+
+// Remap the call sites of shared functions, that are in IL.
+// Change the IL call site of a shared function to its clone.
+//
+void PIC16Cloner::
+remapAllSites(Function *Caller, Function *OrgF, Function *Clone) {
+  // First find the caller to update. If the caller itself is cloned
+  // then use the cloned caller. Otherwise use it.
+  cloned_map_iterator cm_it = ClonedFunctionMap.find(Caller);
+  if (cm_it != ClonedFunctionMap.end())
+    Caller = cm_it->second;
+
+  // For the lack of a better call site finding mechanism, iterate over 
+  // all insns to find the uses of original fn.
+  for (Function::iterator BI = Caller->begin(); BI != Caller->end(); ++BI) {
+    BasicBlock &BB = *BI;
+    for (BasicBlock::iterator II = BB.begin(); II != BB.end(); ++II) {
+      if (II->getNumOperands() > 0 && II->getOperand(0) == OrgF)
+          II->setOperand(0, Clone);
+    }
+  }
+}
+
+
+
diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h
new file mode 100644
index 0000000..24c1152
--- /dev/null
+++ b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h
@@ -0,0 +1,83 @@
+//===-- PIC16Cloner.h - PIC16 LLVM Cloner for shared functions --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains declaration of a cloner class clone all functions that 
+// are shared between the main line code (ML) and interrupt line code (IL).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PIC16CLONER_H
+#define PIC16CLONER_H
+
+#include "llvm/ADT/DenseMap.h"
+
+using namespace llvm;
+using std::vector;
+using std::string;
+using std::map;
+
+namespace llvm {
+  // forward classes.
+  class Value;
+  class Function;
+  class Module;
+  class ModulePass;
+  class CallGraph;
+  class CallGraphNode;
+  class AnalysisUsage;
+
+  class PIC16Cloner : public ModulePass { 
+  public:
+    static char ID; // Class identification 
+    PIC16Cloner() : ModulePass(&ID)  {}
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<CallGraph>();
+    }
+    virtual bool runOnModule(Module &M);
+
+  private: // Functions
+    // Mark reachable functions for the MainLine or InterruptLine.
+    void markCallGraph(CallGraphNode *CGN, string StringMark);
+
+    // Clone auto variables of function specified.
+    void CloneAutos(Function *F);
+   
+    // Clone the body of a function.
+    Function *cloneFunction(Function *F);
+
+    // Clone all shared functions.
+    void cloneSharedFunctions(CallGraphNode *isrCGN);
+
+    // Remap all call sites to the shared function.
+    void remapAllSites(Function *Caller, Function *OrgF, Function *Clone);
+
+    // Error reporting for PIC16Pass
+    void reportError(string ErrorString, vector<string> &Values);
+    void reportError(string ErrorString);
+
+  private:  //data
+    // Records if the interrupt function has already been found.
+    // If more than one interrupt function is found then an error
+    // should be thrown.
+    bool foundISR;
+
+    // This ValueMap maps the auto variables of the original functions with
+    // the corresponding cloned auto variable of the cloned function. 
+    // This value map is passed during the function cloning so that all the
+    // uses of auto variables be updated properly. 
+    DenseMap<const Value*, Value*> ValueMap;
+
+    // Map of a already cloned functions. 
+    map<Function *, Function *> ClonedFunctionMap;
+    typedef map<Function *, Function *>::iterator cloned_map_iterator;
+  };
+}  // End of anonymous namespace
+
+#endif
diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp
index 197c398..5ecb6aa 100644
--- a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp
+++ b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp
@@ -24,27 +24,27 @@
 using namespace llvm;
 
 namespace llvm {
-  char PIC16FrameOverlay::ID = 0;
-  ModulePass *createPIC16OverlayPass() { return new PIC16FrameOverlay(); }
+  char PIC16Overlay::ID = 0;
+  ModulePass *createPIC16OverlayPass() { return new PIC16Overlay(); }
 }
 
-void PIC16FrameOverlay::getAnalysisUsage(AnalysisUsage &AU) const {
+void PIC16Overlay::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   AU.addRequired<CallGraph>();
 }
 
-void PIC16FrameOverlay::DFSTraverse(CallGraphNode *CGN, unsigned Depth) {
+void PIC16Overlay::DFSTraverse(CallGraphNode *CGN, unsigned Depth) {
   // Do not set any color for external calling node.
   if (Depth != 0 && CGN->getFunction()) {
     unsigned Color = getColor(CGN->getFunction());
 
     // Handle indirectly called functions
-    if (Color >= PIC16Overlay::StartIndirectCallColor || 
-        Depth >= PIC16Overlay::StartIndirectCallColor) {
+    if (Color >= PIC16OVERLAY::StartIndirectCallColor || 
+        Depth >= PIC16OVERLAY::StartIndirectCallColor) {
       // All functions called from an indirectly called function are given
       // an unique color.
-      if (Color < PIC16Overlay::StartIndirectCallColor &&
-          Depth >= PIC16Overlay::StartIndirectCallColor)
+      if (Color < PIC16OVERLAY::StartIndirectCallColor &&
+          Depth >= PIC16OVERLAY::StartIndirectCallColor)
         setColor(CGN->getFunction(), Depth);
 
       for (unsigned int i = 0; i < CGN->size(); i++)
@@ -65,7 +65,7 @@ void PIC16FrameOverlay::DFSTraverse(CallGraphNode *CGN, unsigned Depth) {
     DFSTraverse((*CGN)[i], Depth+1);
 }
 
-unsigned PIC16FrameOverlay::ModifyDepthForInterrupt(CallGraphNode *CGN,
+unsigned PIC16Overlay::ModifyDepthForInterrupt(CallGraphNode *CGN,
                                                     unsigned Depth) {
   Function *Fn = CGN->getFunction();
 
@@ -81,7 +81,7 @@ unsigned PIC16FrameOverlay::ModifyDepthForInterrupt(CallGraphNode *CGN,
   return Depth;
 }
 
-void PIC16FrameOverlay::setColor(Function *Fn, unsigned Color) {
+void PIC16Overlay::setColor(Function *Fn, unsigned Color) {
   std::string Section = "";
   if (Fn->hasSection())
     Section = Fn->getSection();
@@ -119,7 +119,7 @@ void PIC16FrameOverlay::setColor(Function *Fn, unsigned Color) {
   Fn->setSection(Section);
 }
 
-unsigned PIC16FrameOverlay::getColor(Function *Fn) {
+unsigned PIC16Overlay::getColor(Function *Fn) {
   int Color = 0;
   if (!Fn->hasSection())
     return 0;
@@ -150,7 +150,7 @@ unsigned PIC16FrameOverlay::getColor(Function *Fn) {
   return Color;    
 }
 
-bool PIC16FrameOverlay::runOnModule(Module &M) {
+bool PIC16Overlay::runOnModule(Module &M) {
   CallGraph &CG = getAnalysis<CallGraph>();
   CallGraphNode *ECN = CG.getExternalCallingNode();
 
@@ -164,7 +164,7 @@ bool PIC16FrameOverlay::runOnModule(Module &M) {
   return false;
 }
 
-void PIC16FrameOverlay::MarkIndirectlyCalledFunctions(Module &M) {
+void PIC16Overlay::MarkIndirectlyCalledFunctions(Module &M) {
   // If the use of a function is not a call instruction then this
   // function might be called indirectly. In that case give it
   // an unique color.
diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h
index d70c4e7..5a2551f 100644
--- a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h
+++ b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h
@@ -1,4 +1,4 @@
-//===-- PIC16FrameOverlay.h - Interface for PIC16 Frame Overlay -*- C++ -*-===//
+//===-- PIC16Overlay.h - Interface for PIC16 Frame Overlay -*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,30 +14,35 @@
 #ifndef PIC16FRAMEOVERLAY_H
 #define PIC16FRAMEOVERLAY_H
  
-#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Pass.h"
-#include "llvm/CallGraphSCCPass.h"
 
 using std::string;
 using namespace llvm;
 
 namespace  llvm {
-  namespace PIC16Overlay {
+  // Forward declarations.
+  class Function;
+  class Module;
+  class ModulePass;
+  class AnalysisUsage;
+  class CallGraphNode;
+  class CallGraph;
+
+  namespace PIC16OVERLAY {
     enum OverlayConsts {
       StartInterruptColor = 200,
       StartIndirectCallColor = 300
     }; 
   }
-  class PIC16FrameOverlay : public ModulePass {
+  class PIC16Overlay : public ModulePass {
     std::string OverlayStr;
     unsigned InterruptDepth;
     unsigned IndirectCallColor;
   public:
     static char ID; // Class identification 
-    PIC16FrameOverlay() : ModulePass(&ID) {
+    PIC16Overlay() : ModulePass(&ID) {
       OverlayStr = "Overlay=";
-      InterruptDepth = PIC16Overlay::StartInterruptColor;
-      IndirectCallColor = PIC16Overlay::StartIndirectCallColor;
+      InterruptDepth = PIC16OVERLAY::StartInterruptColor;
+      IndirectCallColor = PIC16OVERLAY::StartIndirectCallColor;
     }
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const; 
diff --git a/lib/Target/PIC16/PIC16TargetObjectFile.cpp b/lib/Target/PIC16/PIC16TargetObjectFile.cpp
index d7cfe02..b891c18 100644
--- a/lib/Target/PIC16/PIC16TargetObjectFile.cpp
+++ b/lib/Target/PIC16/PIC16TargetObjectFile.cpp
@@ -315,8 +315,12 @@ PIC16TargetObjectFile::allocateSHARED(const GlobalVariable *GV,
 
 // Interface used by AsmPrinter to get a code section for a function.
 const PIC16Section *
-PIC16TargetObjectFile::SectionForCode(const std::string &FnName) const {
+PIC16TargetObjectFile::SectionForCode(const std::string &FnName,
+                                      bool isISR) const {
   const std::string &sec_name = PAN::getCodeSectionName(FnName);
+  // If it is ISR, its code section starts at a specific address.
+  if (isISR)
+    return getPIC16Section(sec_name, CODE, PAN::getISRAddr());
   return getPIC16Section(sec_name, CODE);
 }
 
diff --git a/lib/Target/PIC16/PIC16TargetObjectFile.h b/lib/Target/PIC16/PIC16TargetObjectFile.h
index 0b0ad43..cf8bf84 100644
--- a/lib/Target/PIC16/PIC16TargetObjectFile.h
+++ b/lib/Target/PIC16/PIC16TargetObjectFile.h
@@ -137,7 +137,8 @@ namespace llvm {
 
 
     /// Return a code section for a function.
-    const PIC16Section *SectionForCode (const std::string &FnName) const;
+    const PIC16Section *SectionForCode (const std::string &FnName,
+                                        bool isISR) const;
 
     /// Return a frame section for a function.
     const PIC16Section *SectionForFrame (const std::string &FnName) const;
diff --git a/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp b/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp
index 46cc819..f1bdb12 100644
--- a/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp
+++ b/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp
@@ -15,7 +15,8 @@ using namespace llvm;
 Target llvm::ThePIC16Target, llvm::TheCooperTarget;
 
 extern "C" void LLVMInitializePIC16TargetInfo() { 
-  RegisterTarget<> X(ThePIC16Target, "pic16", "PIC16 14-bit [experimental]");
+  RegisterTarget<Triple::pic16> X(ThePIC16Target, "pic16",
+                                  "PIC16 14-bit [experimental]");
 
   RegisterTarget<> Y(TheCooperTarget, "cooper", "PIC16 Cooper [experimental]");
 }
diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
index afc90b1..ac901d0 100644
--- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
@@ -31,13 +31,13 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetOptions.h"
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index c7ce171..155fba2 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -66,28 +66,13 @@ def CC_PPC : CallingConv<[
 // PowerPC System V Release 4 ABI
 //===----------------------------------------------------------------------===//
 
-// _Complex arguments are never split, thus their two scalars are either
-// passed both in argument registers or both on the stack. Also _Complex
-// arguments are always passed in general purpose registers, never in
-// Floating-point registers or vector registers. Arguments which should go
-// on the stack are marked with the inreg parameter attribute.
-// Giving inreg this target-dependent (and counter-intuitive) meaning
-// simplifies things, because functions calls are not always coming from the
-// frontend but are also created implicitly e.g. for libcalls. If inreg would
-// actually mean that the argument is passed in a register, then all places
-// which create function calls/function definitions implicitly would need to
-// be aware of this fact and would need to mark arguments accordingly. With
-// inreg meaning that the argument is passed on the stack, this is not an
-// issue, except for calls which involve _Complex types.
-
 def CC_PPC_SVR4_Common : CallingConv<[
   // The ABI requires i64 to be passed in two adjacent registers with the first
   // register having an odd register number.
   CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignArgRegs">>>,
 
   // The first 8 integer arguments are passed in integer registers.
-  CCIfType<[i32], CCIf<"!ArgFlags.isInReg()",
-    CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>>,
+  CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
 
   // Make sure the i64 words from a long double are either both passed in
   // registers or both passed on the stack.
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 3a15f7e..66dfd4b 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -257,7 +257,7 @@ void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) {
     case PPC::STWX:   case PPC::STWX8:
     case PPC::STWUX:
     case PPC::STW:    case PPC::STW8:
-    case PPC::STWU:   case PPC::STWU8:
+    case PPC::STWU:
     case PPC::STVEWX:
     case PPC::STFIWX:
     case PPC::STWBRX:
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 004997f..9d79c0d 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -156,10 +156,6 @@ namespace {
     SDValue BuildSDIVSequence(SDNode *N);
     SDValue BuildUDIVSequence(SDNode *N);
     
-    /// InstructionSelect - This callback is invoked by
-    /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-    virtual void InstructionSelect();
-    
     void InsertVRSaveCode(MachineFunction &MF);
 
     virtual const char *getPassName() const {
@@ -184,14 +180,6 @@ private:
   };
 }
 
-/// InstructionSelect - This callback is invoked by
-/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-void PPCDAGToDAGISel::InstructionSelect() {
-  // Select target instructions for the DAG.
-  SelectRoot(*CurDAG);
-  CurDAG->RemoveDeadNodes();
-}
-
 /// InsertVRSaveCode - Once the entire function has been instruction selected,
 /// all virtual registers are created and all machine instructions are built,
 /// check to see if we need to save/restore VRSAVE.  If so, do it.
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index a11d624..3d81afa 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -25,13 +25,13 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/CallingConv.h"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -1243,7 +1243,8 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
 
   // If the global is weak or external, we have to go through the lazy
   // resolution stub.
-  return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Lo, NULL, 0);
+  return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Lo, NULL, 0,
+                     false, false, 0);
 }
 
 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
@@ -1333,7 +1334,7 @@ SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) {
                 false, false, false, false, 0, CallingConv::C, false,
                 /*isReturnValueUsed=*/true,
                 DAG.getExternalSymbol("__trampoline_setup", PtrVT),
-                Args, DAG, dl, DAG.GetOrdering(Chain.getNode()));
+                Args, DAG, dl);
 
   SDValue Ops[] =
     { CallResult.first, CallResult.second };
@@ -1355,7 +1356,8 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
     SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
-    return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0);
+    return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0,
+                        false, false, 0);
   }
 
   // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
@@ -1405,25 +1407,29 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
 
   // Store first byte : number of int regs
   SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR,
-                                         Op.getOperand(1), SV, 0, MVT::i8);
+                                         Op.getOperand(1), SV, 0, MVT::i8,
+                                         false, false, 0);
   uint64_t nextOffset = FPROffset;
   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
                                   ConstFPROffset);
 
   // Store second byte : number of float regs
   SDValue secondStore =
-    DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, SV, nextOffset, MVT::i8);
+    DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, SV, nextOffset, MVT::i8,
+                      false, false, 0);
   nextOffset += StackOffset;
   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
 
   // Store second word : arguments given on stack
   SDValue thirdStore =
-    DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, SV, nextOffset);
+    DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, SV, nextOffset,
+                 false, false, 0);
   nextOffset += FrameOffset;
   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
 
   // Store third word : arguments given in registers
-  return DAG.getStore(thirdStore, dl, FR, nextPtr, SV, nextOffset);
+  return DAG.getStore(thirdStore, dl, FR, nextPtr, SV, nextOffset,
+                      false, false, 0);
 
 }
 
@@ -1628,7 +1634,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
 
       // Create load nodes to retrieve arguments from the stack.
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
-      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0));
+      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0,
+                                   false, false, 0));
     }
   }
 
@@ -1700,7 +1707,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
     unsigned GPRIndex = 0;
     for (; GPRIndex != VarArgsNumGPR; ++GPRIndex) {
       SDValue Val = DAG.getRegister(GPArgRegs[GPRIndex], PtrVT);
-      SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0);
+      SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0,
+                                   false, false, 0);
       MemOps.push_back(Store);
       // Increment the address by four for the next argument to store
       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
@@ -1714,7 +1722,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
       unsigned VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
 
       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
-      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
+      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0,
+                                   false, false, 0);
       MemOps.push_back(Store);
       // Increment the address by four for the next argument to store
       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
@@ -1729,7 +1738,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
     unsigned FPRIndex = 0;
     for (FPRIndex = 0; FPRIndex != VarArgsNumFPR; ++FPRIndex) {
       SDValue Val = DAG.getRegister(FPArgRegs[FPRIndex], MVT::f64);
-      SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0);
+      SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0,
+                                   false, false, 0);
       MemOps.push_back(Store);
       // Increment the address by eight for the next argument to store
       SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8,
@@ -1741,7 +1751,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
       unsigned VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
 
       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
-      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
+      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0,
+                                   false, false, 0);
       MemOps.push_back(Store);
       // Increment the address by eight for the next argument to store
       SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8,
@@ -1903,7 +1914,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
           SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
-                               NULL, 0, ObjSize==1 ? MVT::i8 : MVT::i16 );
+                                            NULL, 0,
+                                            ObjSize==1 ? MVT::i8 : MVT::i16,
+                                            false, false, 0);
           MemOps.push_back(Store);
           ++GPR_idx;
         }
@@ -1921,7 +1934,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
           int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true, false);
           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
-          SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
+          SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0,
+                                       false, false, 0);
           MemOps.push_back(Store);
           ++GPR_idx;
           ArgOffset += PtrByteSize;
@@ -2045,7 +2059,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
                                       CurArgOffset + (ArgSize - ObjSize),
                                       isImmutable, false);
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
-      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0);
+      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0,
+                           false, false, 0);
     }
 
     InVals.push_back(ArgVal);
@@ -2091,7 +2106,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
 
       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
-      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
+      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0,
+                                   false, false, 0);
       MemOps.push_back(Store);
       // Increment the address by four for the next argument to store
       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
@@ -2271,7 +2287,7 @@ StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
     // Store relative to framepointer.
     MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN,
                                        PseudoSourceValue::getFixedStack(FI),
-                                       0));
+                                       0, false, false, 0));
   }
 }
 
@@ -2297,7 +2313,8 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
-                         PseudoSourceValue::getFixedStack(NewRetAddr), 0);
+                         PseudoSourceValue::getFixedStack(NewRetAddr), 0,
+                         false, false, 0);
 
     // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
     // slot as the FP is never overwritten.
@@ -2308,7 +2325,8 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
                                                           true, false);
       SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
       Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
-                           PseudoSourceValue::getFixedStack(NewFPIdx), 0);
+                           PseudoSourceValue::getFixedStack(NewFPIdx), 0,
+                           false, false, 0);
     }
   }
   return Chain;
@@ -2346,14 +2364,16 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
     // Load the LR and FP stack slot for later adjusting.
     EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
     LROpOut = getReturnAddrFrameIndex(DAG);
-    LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, NULL, 0);
+    LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, NULL, 0,
+                          false, false, 0);
     Chain = SDValue(LROpOut.getNode(), 1);
     
     // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
     // slot as the FP is never overwritten.
     if (isDarwinABI) {
       FPOpOut = getFramePointerFrameIndex(DAG);
-      FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, NULL, 0);
+      FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, NULL, 0,
+                            false, false, 0);
       Chain = SDValue(FPOpOut.getNode(), 1);
     }
   }
@@ -2395,7 +2415,8 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
                            DAG.getConstant(ArgOffset, PtrVT));
     }
-    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
+    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+                                       false, false, 0));
   // Calculate and remember argument location.
   } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
                                   TailCallArguments);
@@ -2862,7 +2883,8 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
         PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
 
         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
-                              PseudoSourceValue::getStack(), LocMemOffset));
+                                           PseudoSourceValue::getStack(), LocMemOffset,
+                                           false, false, 0));
       } else {
         // Calculate and remember argument location.
         CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
@@ -3024,7 +3046,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
         EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
         if (GPR_idx != NumGPRs) {
           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
-                                          NULL, 0, VT);
+                                        NULL, 0, VT, false, false, 0);
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
 
@@ -3061,7 +3083,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
         SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
         if (GPR_idx != NumGPRs) {
-          SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, NULL, 0);
+          SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, NULL, 0,
+                                     false, false, 0);
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
           ArgOffset += PtrByteSize;
@@ -3092,19 +3115,22 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
 
         if (isVarArg) {
-          SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0);
+          SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+                                       false, false, 0);
           MemOpChains.push_back(Store);
 
           // Float varargs are always shadowed in available integer registers
           if (GPR_idx != NumGPRs) {
-            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0);
+            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0,
+                                       false, false, 0);
             MemOpChains.push_back(Load.getValue(1));
             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
           }
           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
             SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
             PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
-            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0);
+            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0,
+                                       false, false, 0);
             MemOpChains.push_back(Load.getValue(1));
             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
           }
@@ -3147,10 +3173,12 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
         // entirely in R registers.  Maybe later.
         PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
                             DAG.getConstant(ArgOffset, PtrVT));
-        SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0);
+        SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+                                     false, false, 0);
         MemOpChains.push_back(Store);
         if (VR_idx != NumVRs) {
-          SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, NULL, 0);
+          SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, NULL, 0,
+                                     false, false, 0);
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
         }
@@ -3160,7 +3188,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
             break;
           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
                                   DAG.getConstant(i, PtrVT));
-          SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, NULL, 0);
+          SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, NULL, 0,
+                                     false, false, 0);
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
         }
@@ -3225,7 +3254,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
     // TOC save area offset.
     SDValue PtrOff = DAG.getIntPtrConstant(40);
     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
-    Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, NULL, 0);
+    Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, NULL, 0,
+                         false, false, 0);
   }
 
   // Build a sequence of copy-to-reg nodes chained together with token chain
@@ -3300,13 +3330,15 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
   SDValue SaveSP = Op.getOperand(1);
 
   // Load the old link SP.
-  SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, NULL, 0);
+  SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, NULL, 0,
+                                   false, false, 0);
 
   // Restore the stack pointer.
   Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
 
   // Store the old link SP.
-  return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, NULL, 0);
+  return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, NULL, 0,
+                      false, false, 0);
 }
 
 
@@ -3483,14 +3515,16 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
   SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64);
 
   // Emit a store to the stack slot.
-  SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, NULL, 0);
+  SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, NULL, 0,
+                               false, false, 0);
 
   // Result is a load from the stack slot.  If loading 4 bytes, make sure to
   // add in a bias.
   if (Op.getValueType() == MVT::i32)
     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
                         DAG.getConstant(4, FIPtr.getValueType()));
-  return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, NULL, 0);
+  return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, NULL, 0,
+                     false, false, 0);
 }
 
 SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
@@ -3533,7 +3567,7 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
     DAG.getMemIntrinsicNode(PPCISD::STD_32, dl, DAG.getVTList(MVT::Other),
                             Ops, 4, MVT::i64, MMO);
   // Load the value as a double.
-  SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, NULL, 0);
+  SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, NULL, 0, false, false, 0);
 
   // FCFID it and return it.
   SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld);
@@ -3578,12 +3612,13 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) {
   int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
   SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain,
-                                 StackSlot, NULL, 0);
+                               StackSlot, NULL, 0, false, false, 0);
 
   // Load FP Control Word from low 32 bits of stack slot.
   SDValue Four = DAG.getConstant(4, PtrVT);
   SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
-  SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, NULL, 0);
+  SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, NULL, 0,
+                            false, false, 0);
 
   // Transform as necessary
   SDValue CWD1 =
@@ -4249,9 +4284,11 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
 
   // Store the input value into Value#0 of the stack slot.
   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
-                                 Op.getOperand(0), FIdx, NULL, 0);
+                               Op.getOperand(0), FIdx, NULL, 0,
+                               false, false, 0);
   // Load it out.
-  return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, NULL, 0);
+  return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, NULL, 0,
+                     false, false, 0);
 }
 
 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) {
@@ -5460,7 +5497,8 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
   // to the stack.
   FuncInfo->setLRStoreRequired();
   return DAG.getLoad(getPointerTy(), dl,
-                     DAG.getEntryNode(), RetAddrFI, NULL, 0);
+                     DAG.getEntryNode(), RetAddrFI, NULL, 0,
+                     false, false, 0);
 }
 
 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 219efb9..a0781b9 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -366,7 +366,7 @@ def ADDE8   : XOForm_1<31, 138, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
                        [(set G8RC:$rT, (adde G8RC:$rA, G8RC:$rB))]>;
 def ADDME8  : XOForm_3<31, 234, 0, (outs G8RC:$rT), (ins G8RC:$rA),
                        "addme $rT, $rA", IntGeneral,
-                       [(set G8RC:$rT, (adde G8RC:$rA, immAllOnes))]>;
+                       [(set G8RC:$rT, (adde G8RC:$rA, -1))]>;
 def ADDZE8  : XOForm_3<31, 202, 0, (outs G8RC:$rT), (ins G8RC:$rA),
                        "addze $rT, $rA", IntGeneral,
                        [(set G8RC:$rT, (adde G8RC:$rA, 0))]>;
@@ -375,7 +375,7 @@ def SUBFE8  : XOForm_1<31, 136, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
                        [(set G8RC:$rT, (sube G8RC:$rB, G8RC:$rA))]>;
 def SUBFME8 : XOForm_3<31, 232, 0, (outs G8RC:$rT), (ins G8RC:$rA),
                        "subfme $rT, $rA", IntGeneral,
-                       [(set G8RC:$rT, (sube immAllOnes, G8RC:$rA))]>;
+                       [(set G8RC:$rT, (sube -1, G8RC:$rA))]>;
 def SUBFZE8 : XOForm_3<31, 200, 0, (outs G8RC:$rT), (ins G8RC:$rA),
                        "subfze $rT, $rA", IntGeneral,
                        [(set G8RC:$rT, (sube 0, G8RC:$rA))]>;
@@ -635,13 +635,6 @@ def STHU8 : DForm_1<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
                         (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg, 
                                         iaddroff:$ptroff))]>,
                     RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STWU8 : DForm_1<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
-                             symbolLo:$ptroff, ptr_rc:$ptrreg),
-                    "stwu $rS, $ptroff($ptrreg)", LdStGeneral,
-                    [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg, 
-                                                     iaddroff:$ptroff))]>,
-                    RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-
 
 def STDU : DSForm_1<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
                                 s16immX4:$ptroff, ptr_rc:$ptrreg),
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index af7d812..9895bea 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -19,6 +19,7 @@
 #include "PPCTargetMachine.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -73,8 +74,7 @@ bool PPCInstrInfo::isMoveInstr(const MachineInstr& MI,
       destReg = MI.getOperand(0).getReg();
       return true;
     }
-  } else if (oc == PPC::FMRS || oc == PPC::FMRD ||
-             oc == PPC::FMRSD) {      // fmr r1, r2
+  } else if (oc == PPC::FMR || oc == PPC::FMRSD) { // fmr r1, r2
     assert(MI.getNumOperands() >= 2 &&
            MI.getOperand(0).isReg() &&
            MI.getOperand(1).isReg() &&
@@ -344,10 +344,9 @@ bool PPCInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
     BuildMI(MBB, MI, DL, get(PPC::OR), DestReg).addReg(SrcReg).addReg(SrcReg);
   } else if (DestRC == PPC::G8RCRegisterClass) {
     BuildMI(MBB, MI, DL, get(PPC::OR8), DestReg).addReg(SrcReg).addReg(SrcReg);
-  } else if (DestRC == PPC::F4RCRegisterClass) {
-    BuildMI(MBB, MI, DL, get(PPC::FMRS), DestReg).addReg(SrcReg);
-  } else if (DestRC == PPC::F8RCRegisterClass) {
-    BuildMI(MBB, MI, DL, get(PPC::FMRD), DestReg).addReg(SrcReg);
+  } else if (DestRC == PPC::F4RCRegisterClass ||
+             DestRC == PPC::F8RCRegisterClass) {
+    BuildMI(MBB, MI, DL, get(PPC::FMR), DestReg).addReg(SrcReg);
   } else if (DestRC == PPC::CRRCRegisterClass) {
     BuildMI(MBB, MI, DL, get(PPC::MCRF), DestReg).addReg(SrcReg);
   } else if (DestRC == PPC::VRRCRegisterClass) {
@@ -421,22 +420,30 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
                                          FrameIdx));
       return true;
     } else {
-      // FIXME: We use R0 here, because it isn't available for RA.  We need to
-      // store the CR in the low 4-bits of the saved value.  First, issue a MFCR
-      // to save all of the CRBits.
-      NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCR), PPC::R0));
+      // FIXME: We need a scatch reg here.  The trouble with using R0 is that
+      // it's possible for the stack frame to be so big the save location is
+      // out of range of immediate offsets, necessitating another register.
+      // We hack this on Darwin by reserving R2.  It's probably broken on Linux
+      // at the moment.
+
+      // We need to store the CR in the low 4-bits of the saved value.  First,
+      // issue a MFCR to save all of the CRBits.
+      unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? 
+                                                           PPC::R2 : PPC::R0;
+      NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCR), ScratchReg));
     
       // If the saved register wasn't CR0, shift the bits left so that they are
       // in CR0's slot.
       if (SrcReg != PPC::CR0) {
         unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(SrcReg)*4;
-        // rlwinm r0, r0, ShiftBits, 0, 31.
-        NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), PPC::R0)
-                       .addReg(PPC::R0).addImm(ShiftBits).addImm(0).addImm(31));
+        // rlwinm scratch, scratch, ShiftBits, 0, 31.
+        NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
+                       .addReg(ScratchReg).addImm(ShiftBits)
+                       .addImm(0).addImm(31));
       }
     
       NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
-                                         .addReg(PPC::R0,
+                                         .addReg(ScratchReg,
                                                  getKillRegState(isKill)),
                                          FrameIdx));
     }
@@ -540,20 +547,28 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
     NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg),
                                        FrameIdx));
   } else if (RC == PPC::CRRCRegisterClass) {
-    // FIXME: We use R0 here, because it isn't available for RA.
-    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), PPC::R0),
-                                       FrameIdx));
+    // FIXME: We need a scatch reg here.  The trouble with using R0 is that
+    // it's possible for the stack frame to be so big the save location is
+    // out of range of immediate offsets, necessitating another register.
+    // We hack this on Darwin by reserving R2.  It's probably broken on Linux
+    // at the moment.
+    unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
+                                                          PPC::R2 : PPC::R0;
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), 
+                                       ScratchReg), FrameIdx));
     
     // If the reloaded register isn't CR0, shift the bits right so that they are
     // in the right CR's slot.
     if (DestReg != PPC::CR0) {
       unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(DestReg)*4;
       // rlwinm r11, r11, 32-ShiftBits, 0, 31.
-      NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), PPC::R0)
-                    .addReg(PPC::R0).addImm(32-ShiftBits).addImm(0).addImm(31));
+      NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
+                    .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0)
+                    .addImm(31));
     }
     
-    NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg).addReg(PPC::R0));
+    NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg)
+                     .addReg(ScratchReg));
   } else if (RC == PPC::CRBITRCRegisterClass) {
    
     unsigned Reg = 0;
@@ -672,33 +687,21 @@ MachineInstr *PPCInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
                                         getUndefRegState(isUndef)),
                                 FrameIndex);
     }
-  } else if (Opc == PPC::FMRD) {
-    if (OpNum == 0) {  // move -> store
-      unsigned InReg = MI->getOperand(1).getReg();
-      bool isKill = MI->getOperand(1).isKill();
-      bool isUndef = MI->getOperand(1).isUndef();
-      NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STFD))
-                                .addReg(InReg,
-                                        getKillRegState(isKill) |
-                                        getUndefRegState(isUndef)),
-                                FrameIndex);
-    } else {           // move -> load
-      unsigned OutReg = MI->getOperand(0).getReg();
-      bool isDead = MI->getOperand(0).isDead();
-      bool isUndef = MI->getOperand(0).isUndef();
-      NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LFD))
-                                .addReg(OutReg,
-                                        RegState::Define |
-                                        getDeadRegState(isDead) |
-                                        getUndefRegState(isUndef)),
-                                FrameIndex);
-    }
-  } else if (Opc == PPC::FMRS) {
+  } else if (Opc == PPC::FMR || Opc == PPC::FMRSD) {
+    // The register may be F4RC or F8RC, and that determines the memory op.
+    unsigned OrigReg = MI->getOperand(OpNum).getReg();
+    // We cannot tell the register class from a physreg alone.
+    if (TargetRegisterInfo::isPhysicalRegister(OrigReg))
+      return NULL;
+    const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(OrigReg);
+    const bool is64 = RC == PPC::F8RCRegisterClass;
+
     if (OpNum == 0) {  // move -> store
       unsigned InReg = MI->getOperand(1).getReg();
       bool isKill = MI->getOperand(1).isKill();
       bool isUndef = MI->getOperand(1).isUndef();
-      NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STFS))
+      NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(),
+                                        get(is64 ? PPC::STFD : PPC::STFS))
                                 .addReg(InReg,
                                         getKillRegState(isKill) |
                                         getUndefRegState(isUndef)),
@@ -707,7 +710,8 @@ MachineInstr *PPCInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
       unsigned OutReg = MI->getOperand(0).getReg();
       bool isDead = MI->getOperand(0).isDead();
       bool isUndef = MI->getOperand(0).isUndef();
-      NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LFS))
+      NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(),
+                                        get(is64 ? PPC::LFD : PPC::LFS))
                                 .addReg(OutReg,
                                         RegState::Define |
                                         getDeadRegState(isDead) |
@@ -733,7 +737,7 @@ bool PPCInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
   else if ((Opc == PPC::OR8 &&
               MI->getOperand(1).getReg() == MI->getOperand(2).getReg()))
     return true;
-  else if (Opc == PPC::FMRD || Opc == PPC::FMRS)
+  else if (Opc == PPC::FMR || Opc == PPC::FMRSD)
     return true;
 
   return false;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 842f8ee..845cd8f 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1019,20 +1019,16 @@ let Uses = [RM] in {
   }
 }
 
-/// FMR is split into 3 versions, one for 4/8 byte FP, and one for extending.
+/// FMR is split into 2 versions, one for 4/8 byte FP, and one for extending.
 ///
 /// Note that these are defined as pseudo-ops on the PPC970 because they are
 /// often coalesced away and we don't want the dispatch group builder to think
 /// that they will fill slots (which could cause the load of a LSU reject to
 /// sneak into a d-group with a store).
-def FMRS   : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB),
-                      "fmr $frD, $frB", FPGeneral,
-                      []>,  // (set F4RC:$frD, F4RC:$frB)
-                      PPC970_Unit_Pseudo;
-def FMRD   : XForm_26<63, 72, (outs F8RC:$frD), (ins F8RC:$frB),
-                      "fmr $frD, $frB", FPGeneral,
-                      []>,  // (set F8RC:$frD, F8RC:$frB)
-                      PPC970_Unit_Pseudo;
+def FMR   : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB),
+                     "fmr $frD, $frB", FPGeneral,
+                     []>,  // (set F4RC:$frD, F4RC:$frB)
+                     PPC970_Unit_Pseudo;
 def FMRSD  : XForm_26<63, 72, (outs F8RC:$frD), (ins F4RC:$frB),
                       "fmr $frD, $frB", FPGeneral,
                       [(set F8RC:$frD, (fextend F4RC:$frB))]>,
@@ -1215,7 +1211,7 @@ def ADDE  : XOForm_1<31, 138, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
                       [(set GPRC:$rT, (adde GPRC:$rA, GPRC:$rB))]>;
 def ADDME  : XOForm_3<31, 234, 0, (outs GPRC:$rT), (ins GPRC:$rA),
                       "addme $rT, $rA", IntGeneral,
-                      [(set GPRC:$rT, (adde GPRC:$rA, immAllOnes))]>;
+                      [(set GPRC:$rT, (adde GPRC:$rA, -1))]>;
 def ADDZE  : XOForm_3<31, 202, 0, (outs GPRC:$rT), (ins GPRC:$rA),
                       "addze $rT, $rA", IntGeneral,
                       [(set GPRC:$rT, (adde GPRC:$rA, 0))]>;
@@ -1224,7 +1220,7 @@ def SUBFE : XOForm_1<31, 136, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
                       [(set GPRC:$rT, (sube GPRC:$rB, GPRC:$rA))]>;
 def SUBFME : XOForm_3<31, 232, 0, (outs GPRC:$rT), (ins GPRC:$rA),
                       "subfme $rT, $rA", IntGeneral,
-                      [(set GPRC:$rT, (sube immAllOnes, GPRC:$rA))]>;
+                      [(set GPRC:$rT, (sube -1, GPRC:$rA))]>;
 def SUBFZE : XOForm_3<31, 200, 0, (outs GPRC:$rT), (ins GPRC:$rA),
                       "subfze $rT, $rA", IntGeneral,
                       [(set GPRC:$rT, (sube 0, GPRC:$rA))]>;
@@ -1480,11 +1476,11 @@ def : Pat<(extloadf32 xaddr:$src),
           (FMRSD (LFSX xaddr:$src))>;
 
 // Memory barriers
-def : Pat<(membarrier (i32 imm:$ll),
-                      (i32 imm:$ls),
-                      (i32 imm:$sl),
-                      (i32 imm:$ss),
-                      (i32 imm:$device)),
+def : Pat<(membarrier (i32 imm /*ll*/),
+                      (i32 imm /*ls*/),
+                      (i32 imm /*sl*/),
+                      (i32 imm /*ss*/),
+                      (i32 imm /*device*/)),
            (SYNC)>;
 
 include "PPCInstrAltivec.td"
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 20e77e7..0b509ac 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -427,6 +427,12 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     Reserved.set(PPC::R2);  // System-reserved register
     Reserved.set(PPC::R13); // Small Data Area pointer register
   }
+  // Reserve R2 on Darwin to hack around the problem of save/restore of CR
+  // when the stack frame is too big to address directly; we need two regs.
+  // This is a hack.
+  if (Subtarget.isDarwinABI()) {
+    Reserved.set(PPC::R2);
+  }
   
   // On PPC64, r13 is the thread pointer. Never allocate this register.
   // Note that this is over conservative, as it also prevents allocation of R31
@@ -447,6 +453,12 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     if (Subtarget.isSVR4ABI()) {
       Reserved.set(PPC::X2);
     }
+    // Reserve R2 on Darwin to hack around the problem of save/restore of CR
+    // when the stack frame is too big to address directly; we need two regs.
+    // This is a hack.
+    if (Subtarget.isDarwinABI()) {
+      Reserved.set(PPC::X2);
+    }
   }
 
   if (needsFP(MF))
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index 049e893..1cb7340 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -287,10 +287,8 @@ def GPRC : RegisterClass<"PPC", [i32], 32,
     GPRCClass::allocation_order_begin(const MachineFunction &MF) const {
       // 32-bit SVR4 ABI: r2 is reserved for the OS.
       // 64-bit SVR4 ABI: r2 is reserved for the TOC pointer.
-      if (!MF.getTarget().getSubtarget<PPCSubtarget>().isDarwin())
-        return begin()+1;
-
-      return begin();
+      // Darwin: R2 is reserved for CR save/restore sequence.
+      return begin()+1;
     }
     GPRCClass::iterator
     GPRCClass::allocation_order_end(const MachineFunction &MF) const {
@@ -325,10 +323,8 @@ def G8RC : RegisterClass<"PPC", [i64], 64,
     G8RCClass::iterator
     G8RCClass::allocation_order_begin(const MachineFunction &MF) const {
       // 64-bit SVR4 ABI: r2 is reserved for the TOC pointer.
-      if (!MF.getTarget().getSubtarget<PPCSubtarget>().isDarwin())
-        return begin()+1;
-
-      return begin();
+      // Darwin: r2 is reserved for CR save/restore sequence.
+      return begin()+1;
     }
     G8RCClass::iterator
     G8RCClass::allocation_order_end(const MachineFunction &MF) const {
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 22eecd4..cac6962 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -115,32 +115,3 @@ bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
 
   return false;
 }
-
-/// getLSDAEncoding - Returns the LSDA pointer encoding. The choices are 4-byte,
-/// 8-byte, and target default. The CIE is hard-coded to indicate that the LSDA
-/// pointer in the FDE section is an "sdata4", and should be encoded as a 4-byte
-/// pointer by default. However, some systems may require a different size due
-/// to bugs or other conditions. We will default to a 4-byte encoding unless the
-/// system tells us otherwise.
-///
-/// The issue is when the CIE says their is an LSDA. That mandates that every
-/// FDE have an LSDA slot. But if the function does not need an LSDA. There
-/// needs to be some way to signify there is none. The LSDA is encoded as
-/// pc-rel. But you don't look for some magic value after adding the pc. You
-/// have to look for a zero before adding the pc. The problem is that the size
-/// of the zero to look for depends on the encoding. The unwinder bug in SL is
-/// that it always checks for a pointer-size zero. So on x86_64 it looks for 8
-/// bytes of zero. If you have an LSDA, it works fine since the 8-bytes are
-/// non-zero so it goes ahead and then reads the value based on the encoding.
-/// But if you use sdata4 and there is no LSDA, then the test for zero gives a
-/// false negative and the unwinder thinks there is an LSDA.
-///
-/// FIXME: This call-back isn't good! We should be using the correct encoding
-/// regardless of the system. However, there are some systems which have bugs
-/// that prevent this from occuring.
-DwarfLSDAEncoding::Encoding PPCTargetMachine::getLSDAEncoding() const {
-  if (Subtarget.isDarwin() && Subtarget.getDarwinVers() != 10)
-    return DwarfLSDAEncoding::Default;
-
-  return DwarfLSDAEncoding::EightByte;
-}
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index a654435..ac9ae2b 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -57,18 +57,6 @@ public:
     return InstrItins;
   }
 
-  /// getLSDAEncoding - Returns the LSDA pointer encoding. The choices are
-  /// 4-byte, 8-byte, and target default. The CIE is hard-coded to indicate that
-  /// the LSDA pointer in the FDE section is an "sdata4", and should be encoded
-  /// as a 4-byte pointer by default. However, some systems may require a
-  /// different size due to bugs or other conditions. We will default to a
-  /// 4-byte encoding unless the system tells us otherwise.
-  ///
-  /// FIXME: This call-back isn't good! We should be using the correct encoding
-  /// regardless of the system. However, there are some systems which have bugs
-  /// that prevent this from occuring.
-  virtual DwarfLSDAEncoding::Encoding getLSDAEncoding() const;
-
   // Pass Pipeline Configuration
   virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt
index 8f265cf..3465779 100644
--- a/lib/Target/PowerPC/README.txt
+++ b/lib/Target/PowerPC/README.txt
@@ -889,9 +889,26 @@ entry:
 ; recognize a more elaborate tree than a simple SETxx.
 
 define double @test_FNEG_sel(double %A, double %B, double %C) {
-        %D = sub double -0.000000e+00, %A               ; <double> [#uses=1]
+        %D = fsub double -0.000000e+00, %A               ; <double> [#uses=1]
         %Cond = fcmp ugt double %D, -0.000000e+00               ; <i1> [#uses=1]
         %E = select i1 %Cond, double %B, double %C              ; <double> [#uses=1]
         ret double %E
 }
 
+//===----------------------------------------------------------------------===//
+The save/restore sequence for CR in prolog/epilog is terrible:
+- Each CR subreg is saved individually, rather than doing one save as a unit.
+- On Darwin, the save is done after the decrement of SP, which means the offset
+from SP of the save slot can be too big for a store instruction, which means we
+need an additional register (currently hacked in 96015+96020; the solution there
+is correct, but poor).
+- On SVR4 the same thing can happen, and I don't think saving before the SP
+decrement is safe on that target, as there is no red zone.  This is currently
+broken AFAIK, although it's not a target I can exercise.
+The following demonstrates the problem:
+extern void bar(char *p);
+void foo() {
+  char x[100000];
+  bar(x);
+  __asm__("" ::: "cr2");
+}
diff --git a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
index 9a2ce6b..f6753a6 100644
--- a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
@@ -56,6 +56,9 @@ namespace {
                              unsigned AsmVariant, const char *ExtraCode);
 
     bool printGetPCX(const MachineInstr *MI, unsigned OpNo);
+    
+    virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB)
+                       const;
   };
 } // end of anonymous namespace
 
@@ -140,18 +143,19 @@ bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum) {
     break;
   }
 
+  unsigned mfNum = MI->getParent()->getParent()->getFunctionNumber();
   unsigned bbNum = MI->getParent()->getNumber();
 
-  O << '\n' << ".LLGETPCH" << bbNum << ":\n";
-  O << "\tcall\t.LLGETPC" << bbNum << '\n' ;
+  O << '\n' << ".LLGETPCH" << mfNum << '_' << bbNum << ":\n";
+  O << "\tcall\t.LLGETPC" << mfNum << '_' << bbNum << '\n' ;
 
   O << "\t  sethi\t"
-    << "%hi(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << bbNum << ")), "  
+    << "%hi(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum << ")), "  
     << operand << '\n' ;
 
-  O << ".LLGETPC" << bbNum << ":\n" ;
+  O << ".LLGETPC" << mfNum << '_' << bbNum << ":\n" ;
   O << "\tor\t" << operand  
-    << ", %lo(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << bbNum << ")), "
+    << ", %lo(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum << ")), "
     << operand << '\n';
   O << "\tadd\t" << operand << ", %o7, " << operand << '\n'; 
   
@@ -197,6 +201,39 @@ bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
   return false;
 }
 
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+///
+/// This overrides AsmPrinter's implementation to handle delay slots.
+bool SparcAsmPrinter::
+isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
+  // If this is a landing pad, it isn't a fall through.  If it has no preds,
+  // then nothing falls through to it.
+  if (MBB->isLandingPad() || MBB->pred_empty())
+    return false;
+  
+  // If there isn't exactly one predecessor, it can't be a fall through.
+  MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
+  ++PI2;
+  if (PI2 != MBB->pred_end())
+    return false;
+  
+  // The predecessor has to be immediately before this block.
+  const MachineBasicBlock *Pred = *PI;
+  
+  if (!Pred->isLayoutSuccessor(MBB))
+    return false;
+  
+  // Check if the last terminator is an unconditional branch.
+  MachineBasicBlock::const_iterator I = Pred->end();
+  while (I != Pred->begin() && !(--I)->getDesc().isTerminator())
+    ; // Noop
+  return I == Pred->end() || !I->getDesc().isBarrier();
+}
+
+
+
 // Force static initialization.
 extern "C" void LLVMInitializeSparcAsmPrinter() { 
   RegisterAsmPrinter<SparcAsmPrinter> X(TheSparcTarget);
diff --git a/lib/Target/Sparc/README.txt b/lib/Target/Sparc/README.txt
index cc24abf..b4991fe 100644
--- a/lib/Target/Sparc/README.txt
+++ b/lib/Target/Sparc/README.txt
@@ -56,3 +56,4 @@ int %t1(int %a, int %b) {
   leaf fns.
 * Fill delay slots
 
+* Implement JIT support
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index e1b3299..a7d1805 100644
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -35,7 +35,6 @@ class SparcDAGToDAGISel : public SelectionDAGISel {
   /// make the right decision when generating code for different targets.
   const SparcSubtarget &Subtarget;
   SparcTargetMachine& TM;
-  MachineBasicBlock *CurBB;
 public:
   explicit SparcDAGToDAGISel(SparcTargetMachine &tm)
     : SelectionDAGISel(tm),
@@ -56,10 +55,6 @@ public:
                                             char ConstraintCode,
                                             std::vector<SDValue> &OutOps);
 
-  /// InstructionSelect - This callback is invoked by
-  /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-  virtual void InstructionSelect();
-
   virtual const char *getPassName() const {
     return "SPARC DAG->DAG Pattern Instruction Selection";
   }
@@ -72,17 +67,8 @@ private:
 };
 }  // end anonymous namespace
 
-/// InstructionSelect - This callback is invoked by
-/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-void SparcDAGToDAGISel::InstructionSelect() {
-  CurBB = BB;
-  // Select target instructions for the DAG.
-  SelectRoot(*CurDAG);
-  CurDAG->RemoveDeadNodes();
-}
-
 SDNode* SparcDAGToDAGISel::getGlobalBaseReg() {
-  MachineFunction *MF = CurBB->getParent();
+  MachineFunction *MF = BB->getParent();
   unsigned GlobalBaseReg = TM.getInstrInfo()->getGlobalBaseReg(MF);
   return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
 }
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index e67002a..4e93ef0 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -21,7 +21,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/ADT/VectorExtras.h"
 #include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
@@ -134,7 +134,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
         SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
         SDValue Load;
         if (ObjectVT == MVT::i32) {
-          Load = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0);
+          Load = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0,
+                             false, false, 0);
         } else {
           ISD::LoadExtType LoadOp = ISD::SEXTLOAD;
 
@@ -143,7 +144,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
           FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr,
                               DAG.getConstant(Offset, MVT::i32));
           Load = DAG.getExtLoad(LoadOp, dl, MVT::i32, Chain, FIPtr,
-                                NULL, 0, ObjectVT);
+                                NULL, 0, ObjectVT, false, false, 0);
           Load = DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, Load);
         }
         InVals.push_back(Load);
@@ -167,7 +168,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
         int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
                                                             true, false);
         SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
-        SDValue Load = DAG.getLoad(MVT::f32, dl, Chain, FIPtr, NULL, 0);
+        SDValue Load = DAG.getLoad(MVT::f32, dl, Chain, FIPtr, NULL, 0,
+                                   false, false, 0);
         InVals.push_back(Load);
       }
       ArgOffset += 4;
@@ -189,7 +191,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
           int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
                                                               true, false);
           SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
-          HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0);
+          HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0,
+                              false, false, 0);
         }
 
         SDValue LoVal;
@@ -201,7 +204,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
           int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset+4,
                                                               true, false);
           SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
-          LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0);
+          LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0,
+                              false, false, 0);
         }
 
         // Compose the two halves together into an i64 unit.
@@ -235,7 +239,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
                                                           true, false);
       SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
 
-      OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr, NULL, 0));
+      OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr, NULL, 0,
+                                       false, false, 0));
       ArgOffset += 4;
     }
 
@@ -339,7 +344,8 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     // FIXME: VERIFY THAT 68 IS RIGHT.
     SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset()+68);
     PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
-    MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
+    MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0,
+                                       false, false, 0));
   }
 
 #else
@@ -385,14 +391,17 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       // out the parts as integers.  Top part goes in a reg.
       SDValue StackPtr = DAG.CreateStackTemporary(MVT::f64, MVT::i32);
       SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, 
-                                   Val, StackPtr, NULL, 0);
+                                   Val, StackPtr, NULL, 0,
+                                   false, false, 0);
       // Sparc is big-endian, so the high part comes first.
-      SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0, 0);
+      SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0,
+                               false, false, 0);
       // Increment the pointer to the other half.
       StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
                              DAG.getIntPtrConstant(4));
       // Load the low part.
-      SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0, 0);
+      SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0,
+                               false, false, 0);
 
       RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Hi));
 
@@ -435,7 +444,8 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       SDValue PtrOff = DAG.getConstant(ArgOffset, MVT::i32);
       PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
       MemOpChains.push_back(DAG.getStore(Chain, dl, ValToStore, 
-                                         PtrOff, NULL, 0));
+                                         PtrOff, NULL, 0,
+                                         false, false, 0));
     }
     ArgOffset += ObjSize;
   }
@@ -759,7 +769,7 @@ SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op,
   SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, 
                                 GlobalBase, RelAddr);
   return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 
-                     AbsAddr, NULL, 0);
+                     AbsAddr, NULL, 0, false, false, 0);
 }
 
 SDValue SparcTargetLowering::LowerConstantPool(SDValue Op,
@@ -780,7 +790,7 @@ SDValue SparcTargetLowering::LowerConstantPool(SDValue Op,
   SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
                                 GlobalBase, RelAddr);
   return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 
-                     AbsAddr, NULL, 0);
+                     AbsAddr, NULL, 0, false, false, 0);
 }
 
 static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
@@ -872,7 +882,8 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
                                  DAG.getConstant(TLI.getVarArgsFrameOffset(),
                                                  MVT::i32));
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
-  return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1), SV, 0);
+  return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1), SV, 0,
+                      false, false, 0);
 }
 
 static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
@@ -882,21 +893,23 @@ static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
   SDValue VAListPtr = Node->getOperand(1);
   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
   DebugLoc dl = Node->getDebugLoc();
-  SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr, SV, 0);
+  SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr, SV, 0,
+                               false, false, 0);
   // Increment the pointer, VAList, to the next vaarg
   SDValue NextPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, VAList,
                                   DAG.getConstant(VT.getSizeInBits()/8,
                                                   MVT::i32));
   // Store the incremented VAList to the legalized pointer
   InChain = DAG.getStore(VAList.getValue(1), dl, NextPtr,
-                         VAListPtr, SV, 0);
+                         VAListPtr, SV, 0, false, false, 0);
   // Load the actual argument out of the pointer VAList, unless this is an
   // f64 load.
   if (VT != MVT::f64)
-    return DAG.getLoad(VT, dl, InChain, VAList, NULL, 0);
+    return DAG.getLoad(VT, dl, InChain, VAList, NULL, 0, false, false, 0);
 
   // Otherwise, load it as i64, then do a bitconvert.
-  SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, NULL, 0);
+  SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, NULL, 0,
+                          false, false, 0);
 
   // Bit-Convert the value to f64.
   SDValue Ops[2] = {
diff --git a/lib/Target/Sparc/SparcMachineFunctionInfo.h b/lib/Target/Sparc/SparcMachineFunctionInfo.h
index e457235..56d8708 100644
--- a/lib/Target/Sparc/SparcMachineFunctionInfo.h
+++ b/lib/Target/Sparc/SparcMachineFunctionInfo.h
@@ -22,7 +22,7 @@ namespace llvm {
     unsigned GlobalBaseReg;
   public:
     SparcMachineFunctionInfo() : GlobalBaseReg(0) {}
-    SparcMachineFunctionInfo(MachineFunction &MF) : GlobalBaseReg(0) {}
+    explicit SparcMachineFunctionInfo(MachineFunction &MF) : GlobalBaseReg(0) {}
 
     unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
     void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index f6f632d..8152e1d 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -100,8 +100,6 @@ namespace {
         Lowering(*TM.getTargetLowering()),
         Subtarget(*TM.getSubtargetImpl()) { }
 
-    virtual void InstructionSelect();
-
     virtual const char *getPassName() const {
       return "SystemZ DAG->DAG Pattern Instruction Selection";
     }
@@ -152,10 +150,6 @@ namespace {
     bool MatchAddressBase(SDValue N, SystemZRRIAddressMode &AM);
     bool MatchAddressRI(SDValue N, SystemZRRIAddressMode &AM,
                         bool is12Bit);
-
-  #ifndef NDEBUG
-    unsigned Indent;
-  #endif
   };
 }  // end anonymous namespace
 
@@ -594,41 +588,22 @@ bool SystemZDAGToDAGISel::SelectLAAddr(SDNode *Op, SDValue Addr,
 bool SystemZDAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
                                  SDValue &Base, SDValue &Disp, SDValue &Index) {
   if (ISD::isNON_EXTLoad(N.getNode()) &&
-      N.hasOneUse() &&
-      IsLegalAndProfitableToFold(N.getNode(), P, P))
+      IsLegalToFold(N, P, P))
     return SelectAddrRRI20(P, N.getOperand(1), Base, Disp, Index);
   return false;
 }
 
-/// InstructionSelect - This callback is invoked by
-/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-void SystemZDAGToDAGISel::InstructionSelect() {
-  // Codegen the basic block.
-  DEBUG(errs() << "===== Instruction selection begins:\n");
-  DEBUG(Indent = 0);
-  SelectRoot(*CurDAG);
-  DEBUG(errs() << "===== Instruction selection ends:\n");
-
-  CurDAG->RemoveDeadNodes();
-}
-
 SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
   EVT NVT = Node->getValueType(0);
   DebugLoc dl = Node->getDebugLoc();
   unsigned Opcode = Node->getOpcode();
 
   // Dump information about the Node being selected
-  DEBUG(errs().indent(Indent) << "Selecting: ";
-        Node->dump(CurDAG);
-        errs() << "\n");
-  DEBUG(Indent += 2);
+  DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
 
   // If we have a custom node, we already have selected!
   if (Node->isMachineOpcode()) {
-    DEBUG(errs().indent(Indent-2) << "== ";
-          Node->dump(CurDAG);
-          errs() << "\n");
-    DEBUG(Indent -= 2);
+    DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
     return NULL; // Already selected.
   }
 
@@ -694,9 +669,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
                                                                      MVT::i32));
 
       ReplaceUses(SDValue(Node, 0), SDValue(Div, 0));
-      DEBUG(errs().indent(Indent-2) << "=> ";
-            Result->dump(CurDAG);
-            errs() << "\n");
+      DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
     }
 
     // Copy the remainder (even subreg) result, if it is needed.
@@ -709,15 +682,9 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
                                                                      MVT::i32));
 
       ReplaceUses(SDValue(Node, 1), SDValue(Rem, 0));
-      DEBUG(errs().indent(Indent-2) << "=> ";
-            Result->dump(CurDAG);
-            errs() << "\n");
+      DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
     }
 
-#ifndef NDEBUG
-    Indent -= 2;
-#endif
-
     return NULL;
   }
   case ISD::UDIVREM: {
@@ -783,9 +750,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
                                            CurDAG->getTargetConstant(SubRegIdx,
                                                                      MVT::i32));
       ReplaceUses(SDValue(Node, 0), SDValue(Div, 0));
-      DEBUG(errs().indent(Indent-2) << "=> ";
-            Result->dump(CurDAG);
-            errs() << "\n");
+      DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
     }
 
     // Copy the remainder (even subreg) result, if it is needed.
@@ -797,15 +762,9 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
                                            CurDAG->getTargetConstant(SubRegIdx,
                                                                      MVT::i32));
       ReplaceUses(SDValue(Node, 1), SDValue(Rem, 0));
-      DEBUG(errs().indent(Indent-2) << "=> ";
-            Result->dump(CurDAG);
-            errs() << "\n");
+      DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
     }
 
-#ifndef NDEBUG
-    Indent -= 2;
-#endif
-
     return NULL;
   }
   }
@@ -813,14 +772,12 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
   // Select the default instruction
   SDNode *ResNode = SelectCode(Node);
 
-  DEBUG(errs().indent(Indent-2) << "=> ";
+  DEBUG(errs() << "=> ";
         if (ResNode == NULL || ResNode == Node)
           Node->dump(CurDAG);
         else
           ResNode->dump(CurDAG);
         errs() << "\n";
         );
-  DEBUG(Indent -= 2);
-
   return ResNode;
 }
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index f7405a5..6f4b30f 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -30,9 +30,9 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -337,7 +337,8 @@ SystemZTargetLowering::LowerCCCArguments(SDValue Chain,
       // from this parameter
       SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
       ArgValue = DAG.getLoad(LocVT, dl, Chain, FIN,
-                             PseudoSourceValue::getFixedStack(FI), 0);
+                             PseudoSourceValue::getFixedStack(FI), 0,
+                             false, false, 0);
     }
 
     // If this is an 8/16/32-bit value, it is really passed promoted to 64
@@ -435,7 +436,8 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
                                    DAG.getIntPtrConstant(Offset));
 
       MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
-                                         PseudoSourceValue::getStack(), Offset));
+                                         PseudoSourceValue::getStack(), Offset,
+                                         false, false, 0));
     }
   }
 
@@ -738,7 +740,7 @@ SDValue SystemZTargetLowering::LowerGlobalAddress(SDValue Op,
 
   if (ExtraLoadRequired)
     Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
-                         PseudoSourceValue::getGOT(), 0);
+                         PseudoSourceValue::getGOT(), 0, false, false, 0);
 
   // If there was a non-zero offset that we didn't fold, create an explicit
   // addition for it.
diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td
index 336e20e..f46840c 100644
--- a/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/lib/Target/SystemZ/SystemZInstrFP.td
@@ -58,7 +58,7 @@ def FMOV64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
                       []>;
 }
 
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in {
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
 def FMOV32rm  : Pseudo<(outs FP32:$dst), (ins rriaddr12:$src),
                       "le\t{$dst, $src}",
                       [(set FP32:$dst, (load rriaddr12:$src))]>;
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index 1891bba..a44f6d9 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -257,7 +257,7 @@ def MOV64rihi32 : RILI<0xEC0, (outs GR64:$dst), (ins i64imm:$src),
                        [(set GR64:$dst, i64hi32:$src)]>;
 }
 
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in {
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
 def MOV32rm  : RXI<0x58,
                    (outs GR32:$dst), (ins rriaddr12:$src),
                    "l\t{$dst, $src}",
diff --git a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
index e47d419..fd6e330 100644
--- a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
+++ b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -33,7 +33,8 @@ class SystemZMachineFunctionInfo : public MachineFunctionInfo {
 public:
   SystemZMachineFunctionInfo() : CalleeSavedFrameSize(0) {}
 
-  SystemZMachineFunctionInfo(MachineFunction &MF) : CalleeSavedFrameSize(0) {}
+  explicit SystemZMachineFunctionInfo(MachineFunction &MF)
+    : CalleeSavedFrameSize(0) {}
 
   unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
   void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp
index 295b30f..9a16808 100644
--- a/lib/Target/TargetData.cpp
+++ b/lib/Target/TargetData.cpp
@@ -580,7 +580,7 @@ const IntegerType *TargetData::getIntPtrType(LLVMContext &C) const {
 uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices,
                                       unsigned NumIndices) const {
   const Type *Ty = ptrTy;
-  assert(isa<PointerType>(Ty) && "Illegal argument for getIndexedOffset()");
+  assert(Ty->isPointerTy() && "Illegal argument for getIndexedOffset()");
   uint64_t Result = 0;
 
   generic_gep_type_iterator<Value* const*>
diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp
index 094a57e..18b0fa4 100644
--- a/lib/Target/TargetInstrInfo.cpp
+++ b/lib/Target/TargetInstrInfo.cpp
@@ -15,6 +15,7 @@
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/ErrorHandling.h"
+#include <ctype.h>
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index a231ebc..82619c7 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -19,17 +19,15 @@
 #include "llvm/GlobalVariable.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -289,832 +287,54 @@ TargetLoweringObjectFile::getSectionForConstant(SectionKind Kind) const {
 }
 
 /// getSymbolForDwarfGlobalReference - Return an MCExpr to use for a
-/// pc-relative reference to the specified global variable from exception
-/// handling information.  In addition to the symbol, this returns
-/// by-reference:
-///
-/// IsIndirect - True if the returned symbol is actually a stub that contains
-///    the address of the symbol, false if the symbol is the global itself.
-///
-/// IsPCRel - True if the symbol reference is already pc-relative, false if
-///    the caller needs to subtract off the address of the reference from the
-///    symbol.
-///
+/// reference to the specified global variable from exception
+/// handling information.
 const MCExpr *TargetLoweringObjectFile::
 getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
-                                 MachineModuleInfo *MMI,
-                                 bool &IsIndirect, bool &IsPCRel) const {
-  // The generic implementation of this just returns a direct reference to the
-  // symbol.
-  IsIndirect = false;
-  IsPCRel    = false;
-  
+                             MachineModuleInfo *MMI, unsigned Encoding) const {
   // FIXME: Use GetGlobalValueSymbol.
   SmallString<128> Name;
   Mang->getNameWithPrefix(Name, GV, false);
-  return MCSymbolRefExpr::Create(Name.str(), getContext());
-}
-
-
-//===----------------------------------------------------------------------===//
-//                                  ELF
-//===----------------------------------------------------------------------===//
-typedef StringMap<const MCSectionELF*> ELFUniqueMapTy;
-
-TargetLoweringObjectFileELF::~TargetLoweringObjectFileELF() {
-  // If we have the section uniquing map, free it.
-  delete (ELFUniqueMapTy*)UniquingMap;
-}
-
-const MCSection *TargetLoweringObjectFileELF::
-getELFSection(StringRef Section, unsigned Type, unsigned Flags,
-              SectionKind Kind, bool IsExplicit) const {
-  if (UniquingMap == 0)
-    UniquingMap = new ELFUniqueMapTy();
-  ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)UniquingMap;
-
-  // Do the lookup, if we have a hit, return it.
-  const MCSectionELF *&Entry = Map[Section];
-  if (Entry) return Entry;
-
-  return Entry = MCSectionELF::Create(Section, Type, Flags, Kind, IsExplicit,
-                                      getContext());
-}
-
-void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
-                                             const TargetMachine &TM) {
-  if (UniquingMap != 0)
-    ((ELFUniqueMapTy*)UniquingMap)->clear();
-  TargetLoweringObjectFile::Initialize(Ctx, TM);
-
-  BSSSection =
-    getELFSection(".bss", MCSectionELF::SHT_NOBITS,
-                  MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
-                  SectionKind::getBSS());
-
-  TextSection =
-    getELFSection(".text", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_EXECINSTR | MCSectionELF::SHF_ALLOC,
-                  SectionKind::getText());
-
-  DataSection =
-    getELFSection(".data", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
-                  SectionKind::getDataRel());
-
-  ReadOnlySection =
-    getELFSection(".rodata", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC,
-                  SectionKind::getReadOnly());
-
-  TLSDataSection =
-    getELFSection(".tdata", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS |
-                  MCSectionELF::SHF_WRITE, SectionKind::getThreadData());
-
-  TLSBSSSection =
-    getELFSection(".tbss", MCSectionELF::SHT_NOBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS |
-                  MCSectionELF::SHF_WRITE, SectionKind::getThreadBSS());
-
-  DataRelSection =
-    getELFSection(".data.rel", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
-                  SectionKind::getDataRel());
-
-  DataRelLocalSection =
-    getELFSection(".data.rel.local", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
-                  SectionKind::getDataRelLocal());
-
-  DataRelROSection =
-    getELFSection(".data.rel.ro", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
-                  SectionKind::getReadOnlyWithRel());
-
-  DataRelROLocalSection =
-    getELFSection(".data.rel.ro.local", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
-                  SectionKind::getReadOnlyWithRelLocal());
-
-  MergeableConst4Section =
-    getELFSection(".rodata.cst4", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE,
-                  SectionKind::getMergeableConst4());
-
-  MergeableConst8Section =
-    getELFSection(".rodata.cst8", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE,
-                  SectionKind::getMergeableConst8());
-
-  MergeableConst16Section =
-    getELFSection(".rodata.cst16", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE,
-                  SectionKind::getMergeableConst16());
-
-  StaticCtorSection =
-    getELFSection(".ctors", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
-                  SectionKind::getDataRel());
-
-  StaticDtorSection =
-    getELFSection(".dtors", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
-                  SectionKind::getDataRel());
-
-  // Exception Handling Sections.
-
-  // FIXME: We're emitting LSDA info into a readonly section on ELF, even though
-  // it contains relocatable pointers.  In PIC mode, this is probably a big
-  // runtime hit for C++ apps.  Either the contents of the LSDA need to be
-  // adjusted or this should be a data section.
-  LSDASection =
-    getELFSection(".gcc_except_table", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC, SectionKind::getReadOnly());
-  EHFrameSection =
-    getELFSection(".eh_frame", MCSectionELF::SHT_PROGBITS,
-                  MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE,
-                  SectionKind::getDataRel());
-
-  // Debug Info Sections.
-  DwarfAbbrevSection =
-    getELFSection(".debug_abbrev", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
-  DwarfInfoSection =
-    getELFSection(".debug_info", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
-  DwarfLineSection =
-    getELFSection(".debug_line", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
-  DwarfFrameSection =
-    getELFSection(".debug_frame", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
-  DwarfPubNamesSection =
-    getELFSection(".debug_pubnames", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
-  DwarfPubTypesSection =
-    getELFSection(".debug_pubtypes", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
-  DwarfStrSection =
-    getELFSection(".debug_str", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
-  DwarfLocSection =
-    getELFSection(".debug_loc", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
-  DwarfARangesSection =
-    getELFSection(".debug_aranges", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
-  DwarfRangesSection =
-    getELFSection(".debug_ranges", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
-  DwarfMacroInfoSection =
-    getELFSection(".debug_macinfo", MCSectionELF::SHT_PROGBITS, 0,
-                  SectionKind::getMetadata());
-}
-
-
-static SectionKind
-getELFKindForNamedSection(StringRef Name, SectionKind K) {
-  if (Name.empty() || Name[0] != '.') return K;
-
-  // Some lame default implementation based on some magic section names.
-  if (Name == ".bss" ||
-      Name.startswith(".bss.") ||
-      Name.startswith(".gnu.linkonce.b.") ||
-      Name.startswith(".llvm.linkonce.b.") ||
-      Name == ".sbss" ||
-      Name.startswith(".sbss.") ||
-      Name.startswith(".gnu.linkonce.sb.") ||
-      Name.startswith(".llvm.linkonce.sb."))
-    return SectionKind::getBSS();
-
-  if (Name == ".tdata" ||
-      Name.startswith(".tdata.") ||
-      Name.startswith(".gnu.linkonce.td.") ||
-      Name.startswith(".llvm.linkonce.td."))
-    return SectionKind::getThreadData();
-
-  if (Name == ".tbss" ||
-      Name.startswith(".tbss.") ||
-      Name.startswith(".gnu.linkonce.tb.") ||
-      Name.startswith(".llvm.linkonce.tb."))
-    return SectionKind::getThreadBSS();
-
-  return K;
-}
-
-
-static unsigned getELFSectionType(StringRef Name, SectionKind K) {
-
-  if (Name == ".init_array")
-    return MCSectionELF::SHT_INIT_ARRAY;
-
-  if (Name == ".fini_array")
-    return MCSectionELF::SHT_FINI_ARRAY;
+  const MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str());
 
-  if (Name == ".preinit_array")
-    return MCSectionELF::SHT_PREINIT_ARRAY;
-
-  if (K.isBSS() || K.isThreadBSS())
-    return MCSectionELF::SHT_NOBITS;
-
-  return MCSectionELF::SHT_PROGBITS;
-}
-
-
-static unsigned
-getELFSectionFlags(SectionKind K) {
-  unsigned Flags = 0;
-
-  if (!K.isMetadata())
-    Flags |= MCSectionELF::SHF_ALLOC;
-
-  if (K.isText())
-    Flags |= MCSectionELF::SHF_EXECINSTR;
-
-  if (K.isWriteable())
-    Flags |= MCSectionELF::SHF_WRITE;
-
-  if (K.isThreadLocal())
-    Flags |= MCSectionELF::SHF_TLS;
-
-  // K.isMergeableConst() is left out to honour PR4650
-  if (K.isMergeableCString() || K.isMergeableConst4() ||
-      K.isMergeableConst8() || K.isMergeableConst16())
-    Flags |= MCSectionELF::SHF_MERGE;
-
-  if (K.isMergeableCString())
-    Flags |= MCSectionELF::SHF_STRINGS;
-
-  return Flags;
-}
-
-
-const MCSection *TargetLoweringObjectFileELF::
-getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
-                         Mangler *Mang, const TargetMachine &TM) const {
-  StringRef SectionName = GV->getSection();
-
-  // Infer section flags from the section name if we can.
-  Kind = getELFKindForNamedSection(SectionName, Kind);
-
-  return getELFSection(SectionName,
-                       getELFSectionType(SectionName, Kind),
-                       getELFSectionFlags(Kind), Kind, true);
-}
-
-static const char *getSectionPrefixForUniqueGlobal(SectionKind Kind) {
-  if (Kind.isText())                 return ".gnu.linkonce.t.";
-  if (Kind.isReadOnly())             return ".gnu.linkonce.r.";
-
-  if (Kind.isThreadData())           return ".gnu.linkonce.td.";
-  if (Kind.isThreadBSS())            return ".gnu.linkonce.tb.";
-
-  if (Kind.isDataNoRel())            return ".gnu.linkonce.d.";
-  if (Kind.isDataRelLocal())         return ".gnu.linkonce.d.rel.local.";
-  if (Kind.isDataRel())              return ".gnu.linkonce.d.rel.";
-  if (Kind.isReadOnlyWithRelLocal()) return ".gnu.linkonce.d.rel.ro.local.";
-
-  assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
-  return ".gnu.linkonce.d.rel.ro.";
+  return getSymbolForDwarfReference(Sym, MMI, Encoding);
 }
 
-const MCSection *TargetLoweringObjectFileELF::
-SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
-                       Mangler *Mang, const TargetMachine &TM) const {
-
-  // If this global is linkonce/weak and the target handles this by emitting it
-  // into a 'uniqued' section name, create and return the section now.
-  if (GV->isWeakForLinker() && !Kind.isCommon() && !Kind.isBSS()) {
-    const char *Prefix = getSectionPrefixForUniqueGlobal(Kind);
-    SmallString<128> Name;
-    Name.append(Prefix, Prefix+strlen(Prefix));
-    Mang->getNameWithPrefix(Name, GV, false);
-    return getELFSection(Name.str(), getELFSectionType(Name.str(), Kind),
-                         getELFSectionFlags(Kind), Kind);
-  }
-
-  if (Kind.isText()) return TextSection;
-
-  if (Kind.isMergeable1ByteCString() ||
-      Kind.isMergeable2ByteCString() ||
-      Kind.isMergeable4ByteCString()) {
-
-    // We also need alignment here.
-    // FIXME: this is getting the alignment of the character, not the
-    // alignment of the global!
-    unsigned Align =
-      TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV));
-
-    const char *SizeSpec = ".rodata.str1.";
-    if (Kind.isMergeable2ByteCString())
-      SizeSpec = ".rodata.str2.";
-    else if (Kind.isMergeable4ByteCString())
-      SizeSpec = ".rodata.str4.";
-    else
-      assert(Kind.isMergeable1ByteCString() && "unknown string width");
-
-
-    std::string Name = SizeSpec + utostr(Align);
-    return getELFSection(Name, MCSectionELF::SHT_PROGBITS,
-                         MCSectionELF::SHF_ALLOC |
-                         MCSectionELF::SHF_MERGE |
-                         MCSectionELF::SHF_STRINGS,
-                         Kind);
-  }
-
-  if (Kind.isMergeableConst()) {
-    if (Kind.isMergeableConst4() && MergeableConst4Section)
-      return MergeableConst4Section;
-    if (Kind.isMergeableConst8() && MergeableConst8Section)
-      return MergeableConst8Section;
-    if (Kind.isMergeableConst16() && MergeableConst16Section)
-      return MergeableConst16Section;
-    return ReadOnlySection;  // .const
-  }
-
-  if (Kind.isReadOnly())             return ReadOnlySection;
-
-  if (Kind.isThreadData())           return TLSDataSection;
-  if (Kind.isThreadBSS())            return TLSBSSSection;
-
-  // Note: we claim that common symbols are put in BSSSection, but they are
-  // really emitted with the magic .comm directive, which creates a symbol table
-  // entry but not a section.
-  if (Kind.isBSS() || Kind.isCommon()) return BSSSection;
-
-  if (Kind.isDataNoRel())            return DataSection;
-  if (Kind.isDataRelLocal())         return DataRelLocalSection;
-  if (Kind.isDataRel())              return DataRelSection;
-  if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection;
-
-  assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
-  return DataRelROSection;
-}
-
-/// getSectionForConstant - Given a mergeable constant with the
-/// specified size and relocation information, return a section that it
-/// should be placed in.
-const MCSection *TargetLoweringObjectFileELF::
-getSectionForConstant(SectionKind Kind) const {
-  if (Kind.isMergeableConst4() && MergeableConst4Section)
-    return MergeableConst4Section;
-  if (Kind.isMergeableConst8() && MergeableConst8Section)
-    return MergeableConst8Section;
-  if (Kind.isMergeableConst16() && MergeableConst16Section)
-    return MergeableConst16Section;
-  if (Kind.isReadOnly())
-    return ReadOnlySection;
-
-  if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection;
-  assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
-  return DataRelROSection;
-}
-
-//===----------------------------------------------------------------------===//
-//                                 MachO
-//===----------------------------------------------------------------------===//
-
-typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy;
-
-TargetLoweringObjectFileMachO::~TargetLoweringObjectFileMachO() {
-  // If we have the MachO uniquing map, free it.
-  delete (MachOUniqueMapTy*)UniquingMap;
-}
-
-
-const MCSectionMachO *TargetLoweringObjectFileMachO::
-getMachOSection(StringRef Segment, StringRef Section,
-                unsigned TypeAndAttributes,
-                unsigned Reserved2, SectionKind Kind) const {
-  // We unique sections by their segment/section pair.  The returned section
-  // may not have the same flags as the requested section, if so this should be
-  // diagnosed by the client as an error.
-
-  // Create the map if it doesn't already exist.
-  if (UniquingMap == 0)
-    UniquingMap = new MachOUniqueMapTy();
-  MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)UniquingMap;
-
-  // Form the name to look up.
-  SmallString<64> Name;
-  Name += Segment;
-  Name.push_back(',');
-  Name += Section;
-
-  // Do the lookup, if we have a hit, return it.
-  const MCSectionMachO *&Entry = Map[Name.str()];
-  if (Entry) return Entry;
-
-  // Otherwise, return a new section.
-  return Entry = MCSectionMachO::Create(Segment, Section, TypeAndAttributes,
-                                        Reserved2, Kind, getContext());
-}
-
-
-void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
-                                               const TargetMachine &TM) {
-  if (UniquingMap != 0)
-    ((MachOUniqueMapTy*)UniquingMap)->clear();
-  TargetLoweringObjectFile::Initialize(Ctx, TM);
-
-  TextSection // .text
-    = getMachOSection("__TEXT", "__text",
-                      MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
-                      SectionKind::getText());
-  DataSection // .data
-    = getMachOSection("__DATA", "__data", 0, SectionKind::getDataRel());
-
-  CStringSection // .cstring
-    = getMachOSection("__TEXT", "__cstring", MCSectionMachO::S_CSTRING_LITERALS,
-                      SectionKind::getMergeable1ByteCString());
-  UStringSection
-    = getMachOSection("__TEXT","__ustring", 0,
-                      SectionKind::getMergeable2ByteCString());
-  FourByteConstantSection // .literal4
-    = getMachOSection("__TEXT", "__literal4", MCSectionMachO::S_4BYTE_LITERALS,
-                      SectionKind::getMergeableConst4());
-  EightByteConstantSection // .literal8
-    = getMachOSection("__TEXT", "__literal8", MCSectionMachO::S_8BYTE_LITERALS,
-                      SectionKind::getMergeableConst8());
-
-  // ld_classic doesn't support .literal16 in 32-bit mode, and ld64 falls back
-  // to using it in -static mode.
-  SixteenByteConstantSection = 0;
-  if (TM.getRelocationModel() != Reloc::Static &&
-      TM.getTargetData()->getPointerSize() == 32)
-    SixteenByteConstantSection =   // .literal16
-      getMachOSection("__TEXT", "__literal16",MCSectionMachO::S_16BYTE_LITERALS,
-                      SectionKind::getMergeableConst16());
-
-  ReadOnlySection  // .const
-    = getMachOSection("__TEXT", "__const", 0, SectionKind::getReadOnly());
-
-  TextCoalSection
-    = getMachOSection("__TEXT", "__textcoal_nt",
-                      MCSectionMachO::S_COALESCED |
-                      MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
-                      SectionKind::getText());
-  ConstTextCoalSection
-    = getMachOSection("__TEXT", "__const_coal", MCSectionMachO::S_COALESCED,
-                      SectionKind::getText());
-  ConstDataCoalSection
-    = getMachOSection("__DATA","__const_coal", MCSectionMachO::S_COALESCED,
-                      SectionKind::getText());
-  ConstDataSection  // .const_data
-    = getMachOSection("__DATA", "__const", 0,
-                      SectionKind::getReadOnlyWithRel());
-  DataCoalSection
-    = getMachOSection("__DATA","__datacoal_nt", MCSectionMachO::S_COALESCED,
-                      SectionKind::getDataRel());
-  DataCommonSection
-    = getMachOSection("__DATA","__common", MCSectionMachO::S_ZEROFILL,
-                      SectionKind::getBSS());
-  DataBSSSection
-    = getMachOSection("__DATA","__bss", MCSectionMachO::S_ZEROFILL,
-                    SectionKind::getBSS());
-  
-
-  LazySymbolPointerSection
-    = getMachOSection("__DATA", "__la_symbol_ptr",
-                      MCSectionMachO::S_LAZY_SYMBOL_POINTERS,
-                      SectionKind::getMetadata());
-  NonLazySymbolPointerSection
-    = getMachOSection("__DATA", "__nl_symbol_ptr",
-                      MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
-                      SectionKind::getMetadata());
-
-  if (TM.getRelocationModel() == Reloc::Static) {
-    StaticCtorSection
-      = getMachOSection("__TEXT", "__constructor", 0,SectionKind::getDataRel());
-    StaticDtorSection
-      = getMachOSection("__TEXT", "__destructor", 0, SectionKind::getDataRel());
-  } else {
-    StaticCtorSection
-      = getMachOSection("__DATA", "__mod_init_func",
-                        MCSectionMachO::S_MOD_INIT_FUNC_POINTERS,
-                        SectionKind::getDataRel());
-    StaticDtorSection
-      = getMachOSection("__DATA", "__mod_term_func",
-                        MCSectionMachO::S_MOD_TERM_FUNC_POINTERS,
-                        SectionKind::getDataRel());
-  }
-
-  // Exception Handling.
-  LSDASection = getMachOSection("__DATA", "__gcc_except_tab", 0,
-                                SectionKind::getDataRel());
-  EHFrameSection =
-    getMachOSection("__TEXT", "__eh_frame",
-                    MCSectionMachO::S_COALESCED |
-                    MCSectionMachO::S_ATTR_NO_TOC |
-                    MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS |
-                    MCSectionMachO::S_ATTR_LIVE_SUPPORT,
-                    SectionKind::getReadOnly());
-
-  // Debug Information.
-  DwarfAbbrevSection =
-    getMachOSection("__DWARF", "__debug_abbrev", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
-  DwarfInfoSection =
-    getMachOSection("__DWARF", "__debug_info", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
-  DwarfLineSection =
-    getMachOSection("__DWARF", "__debug_line", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
-  DwarfFrameSection =
-    getMachOSection("__DWARF", "__debug_frame", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
-  DwarfPubNamesSection =
-    getMachOSection("__DWARF", "__debug_pubnames", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
-  DwarfPubTypesSection =
-    getMachOSection("__DWARF", "__debug_pubtypes", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
-  DwarfStrSection =
-    getMachOSection("__DWARF", "__debug_str", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
-  DwarfLocSection =
-    getMachOSection("__DWARF", "__debug_loc", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
-  DwarfARangesSection =
-    getMachOSection("__DWARF", "__debug_aranges", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
-  DwarfRangesSection =
-    getMachOSection("__DWARF", "__debug_ranges", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
-  DwarfMacroInfoSection =
-    getMachOSection("__DWARF", "__debug_macinfo", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
-  DwarfDebugInlineSection =
-    getMachOSection("__DWARF", "__debug_inlined", MCSectionMachO::S_ATTR_DEBUG,
-                    SectionKind::getMetadata());
-}
-
-const MCSection *TargetLoweringObjectFileMachO::
-getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
-                         Mangler *Mang, const TargetMachine &TM) const {
-  // Parse the section specifier and create it if valid.
-  StringRef Segment, Section;
-  unsigned TAA, StubSize;
-  std::string ErrorCode =
-    MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section,
-                                          TAA, StubSize);
-  if (!ErrorCode.empty()) {
-    // If invalid, report the error with llvm_report_error.
-    llvm_report_error("Global variable '" + GV->getNameStr() +
-                      "' has an invalid section specifier '" + GV->getSection()+
-                      "': " + ErrorCode + ".");
-    // Fall back to dropping it into the data section.
-    return DataSection;
-  }
-
-  // Get the section.
-  const MCSectionMachO *S =
-    getMachOSection(Segment, Section, TAA, StubSize, Kind);
-
-  // Okay, now that we got the section, verify that the TAA & StubSize agree.
-  // If the user declared multiple globals with different section flags, we need
-  // to reject it here.
-  if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) {
-    // If invalid, report the error with llvm_report_error.
-    llvm_report_error("Global variable '" + GV->getNameStr() +
-                      "' section type or attributes does not match previous"
-                      " section specifier");
-  }
-
-  return S;
-}
-
-const MCSection *TargetLoweringObjectFileMachO::
-SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
-                       Mangler *Mang, const TargetMachine &TM) const {
-  assert(!Kind.isThreadLocal() && "Darwin doesn't support TLS");
-
-  if (Kind.isText())
-    return GV->isWeakForLinker() ? TextCoalSection : TextSection;
-
-  // If this is weak/linkonce, put this in a coalescable section, either in text
-  // or data depending on if it is writable.
-  if (GV->isWeakForLinker()) {
-    if (Kind.isReadOnly())
-      return ConstTextCoalSection;
-    return DataCoalSection;
-  }
-
-  // FIXME: Alignment check should be handled by section classifier.
-  if (Kind.isMergeable1ByteCString() ||
-      Kind.isMergeable2ByteCString()) {
-    if (TM.getTargetData()->getPreferredAlignment(
-                                              cast<GlobalVariable>(GV)) < 32) {
-      if (Kind.isMergeable1ByteCString())
-        return CStringSection;
-      assert(Kind.isMergeable2ByteCString());
-      return UStringSection;
-    }
-  }
-
-  if (Kind.isMergeableConst()) {
-    if (Kind.isMergeableConst4())
-      return FourByteConstantSection;
-    if (Kind.isMergeableConst8())
-      return EightByteConstantSection;
-    if (Kind.isMergeableConst16() && SixteenByteConstantSection)
-      return SixteenByteConstantSection;
-  }
-
-  // Otherwise, if it is readonly, but not something we can specially optimize,
-  // just drop it in .const.
-  if (Kind.isReadOnly())
-    return ReadOnlySection;
-
-  // If this is marked const, put it into a const section.  But if the dynamic
-  // linker needs to write to it, put it in the data segment.
-  if (Kind.isReadOnlyWithRel())
-    return ConstDataSection;
-
-  // Put zero initialized globals with strong external linkage in the
-  // DATA, __common section with the .zerofill directive.
-  if (Kind.isBSSExtern())
-    return DataCommonSection;
-
-  // Put zero initialized globals with local linkage in __DATA,__bss directive
-  // with the .zerofill directive (aka .lcomm).
-  if (Kind.isBSSLocal())
-    return DataBSSSection;
-  
-  // Otherwise, just drop the variable in the normal data section.
-  return DataSection;
-}
-
-const MCSection *
-TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind) const {
-  // If this constant requires a relocation, we have to put it in the data
-  // segment, not in the text segment.
-  if (Kind.isDataRel() || Kind.isReadOnlyWithRel())
-    return ConstDataSection;
-
-  if (Kind.isMergeableConst4())
-    return FourByteConstantSection;
-  if (Kind.isMergeableConst8())
-    return EightByteConstantSection;
-  if (Kind.isMergeableConst16() && SixteenByteConstantSection)
-    return SixteenByteConstantSection;
-  return ReadOnlySection;  // .const
-}
-
-/// shouldEmitUsedDirectiveFor - This hook allows targets to selectively decide
-/// not to emit the UsedDirective for some symbols in llvm.used.
-// FIXME: REMOVE this (rdar://7071300)
-bool TargetLoweringObjectFileMachO::
-shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *Mang) const {
-  /// On Darwin, internally linked data beginning with "L" or "l" does not have
-  /// the directive emitted (this occurs in ObjC metadata).
-  if (!GV) return false;
-
-  // Check whether the mangled name has the "Private" or "LinkerPrivate" prefix.
-  if (GV->hasLocalLinkage() && !isa<Function>(GV)) {
-    // FIXME: ObjC metadata is currently emitted as internal symbols that have
-    // \1L and \0l prefixes on them.  Fix them to be Private/LinkerPrivate and
-    // this horrible hack can go away.
-    SmallString<64> Name;
-    Mang->getNameWithPrefix(Name, GV, false);
-    if (Name[0] == 'L' || Name[0] == 'l')
-      return false;
+const MCExpr *TargetLoweringObjectFile::
+getSymbolForDwarfReference(const MCSymbol *Sym, MachineModuleInfo *MMI,
+                           unsigned Encoding) const {
+  const MCExpr *Res = MCSymbolRefExpr::Create(Sym, getContext());
+
+  switch (Encoding & 0xF0) {
+  default:
+    llvm_report_error("We do not support this DWARF encoding yet!");
+    break;
+  case dwarf::DW_EH_PE_absptr:
+    // Do nothing special
+    break;
+  case dwarf::DW_EH_PE_pcrel:
+    // FIXME: PCSymbol
+    const MCExpr *PC = MCSymbolRefExpr::Create(".", getContext());
+    Res = MCBinaryExpr::CreateSub(Res, PC, getContext());
+    break;
   }
 
-  return true;
+  return Res;
 }
 
-const MCExpr *TargetLoweringObjectFileMachO::
-getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
-                                 MachineModuleInfo *MMI,
-                                 bool &IsIndirect, bool &IsPCRel) const {
-  // The mach-o version of this method defaults to returning a stub reference.
-  IsIndirect = true;
-  IsPCRel    = false;
-  
-  SmallString<128> Name;
-  Mang->getNameWithPrefix(Name, GV, true);
-  Name += "$non_lazy_ptr";
-  return MCSymbolRefExpr::Create(Name.str(), getContext());
+unsigned TargetLoweringObjectFile::getPersonalityEncoding() const {
+  return dwarf::DW_EH_PE_absptr;
 }
 
-
-//===----------------------------------------------------------------------===//
-//                                  COFF
-//===----------------------------------------------------------------------===//
-
-typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy;
-
-TargetLoweringObjectFileCOFF::~TargetLoweringObjectFileCOFF() {
-  delete (COFFUniqueMapTy*)UniquingMap;
+unsigned TargetLoweringObjectFile::getLSDAEncoding() const {
+  return dwarf::DW_EH_PE_absptr;
 }
 
-
-const MCSection *TargetLoweringObjectFileCOFF::
-getCOFFSection(StringRef Name, bool isDirective, SectionKind Kind) const {
-  // Create the map if it doesn't already exist.
-  if (UniquingMap == 0)
-    UniquingMap = new MachOUniqueMapTy();
-  COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)UniquingMap;
-
-  // Do the lookup, if we have a hit, return it.
-  const MCSectionCOFF *&Entry = Map[Name];
-  if (Entry) return Entry;
-
-  return Entry = MCSectionCOFF::Create(Name, isDirective, Kind, getContext());
+unsigned TargetLoweringObjectFile::getFDEEncoding() const {
+  return dwarf::DW_EH_PE_absptr;
 }
 
-void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
-                                              const TargetMachine &TM) {
-  if (UniquingMap != 0)
-    ((COFFUniqueMapTy*)UniquingMap)->clear();
-  TargetLoweringObjectFile::Initialize(Ctx, TM);
-  TextSection = getCOFFSection("\t.text", true, SectionKind::getText());
-  DataSection = getCOFFSection("\t.data", true, SectionKind::getDataRel());
-  StaticCtorSection =
-    getCOFFSection(".ctors", false, SectionKind::getDataRel());
-  StaticDtorSection =
-    getCOFFSection(".dtors", false, SectionKind::getDataRel());
-
-  // FIXME: We're emitting LSDA info into a readonly section on COFF, even
-  // though it contains relocatable pointers.  In PIC mode, this is probably a
-  // big runtime hit for C++ apps.  Either the contents of the LSDA need to be
-  // adjusted or this should be a data section.
-  LSDASection =
-    getCOFFSection(".gcc_except_table", false, SectionKind::getReadOnly());
-  EHFrameSection =
-    getCOFFSection(".eh_frame", false, SectionKind::getDataRel());
-
-  // Debug info.
-  // FIXME: Don't use 'directive' mode here.
-  DwarfAbbrevSection =
-    getCOFFSection("\t.section\t.debug_abbrev,\"dr\"",
-                   true, SectionKind::getMetadata());
-  DwarfInfoSection =
-    getCOFFSection("\t.section\t.debug_info,\"dr\"",
-                   true, SectionKind::getMetadata());
-  DwarfLineSection =
-    getCOFFSection("\t.section\t.debug_line,\"dr\"",
-                   true, SectionKind::getMetadata());
-  DwarfFrameSection =
-    getCOFFSection("\t.section\t.debug_frame,\"dr\"",
-                   true, SectionKind::getMetadata());
-  DwarfPubNamesSection =
-    getCOFFSection("\t.section\t.debug_pubnames,\"dr\"",
-                   true, SectionKind::getMetadata());
-  DwarfPubTypesSection =
-    getCOFFSection("\t.section\t.debug_pubtypes,\"dr\"",
-                   true, SectionKind::getMetadata());
-  DwarfStrSection =
-    getCOFFSection("\t.section\t.debug_str,\"dr\"",
-                   true, SectionKind::getMetadata());
-  DwarfLocSection =
-    getCOFFSection("\t.section\t.debug_loc,\"dr\"",
-                   true, SectionKind::getMetadata());
-  DwarfARangesSection =
-    getCOFFSection("\t.section\t.debug_aranges,\"dr\"",
-                   true, SectionKind::getMetadata());
-  DwarfRangesSection =
-    getCOFFSection("\t.section\t.debug_ranges,\"dr\"",
-                   true, SectionKind::getMetadata());
-  DwarfMacroInfoSection =
-    getCOFFSection("\t.section\t.debug_macinfo,\"dr\"",
-                   true, SectionKind::getMetadata());
-}
-
-const MCSection *TargetLoweringObjectFileCOFF::
-getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
-                         Mangler *Mang, const TargetMachine &TM) const {
-  return getCOFFSection(GV->getSection(), false, Kind);
-}
-
-static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) {
-  if (Kind.isText())
-    return ".text$linkonce";
-  if (Kind.isWriteable())
-    return ".data$linkonce";
-  return ".rdata$linkonce";
-}
-
-
-const MCSection *TargetLoweringObjectFileCOFF::
-SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
-                       Mangler *Mang, const TargetMachine &TM) const {
-  assert(!Kind.isThreadLocal() && "Doesn't support TLS");
-
-  // If this global is linkonce/weak and the target handles this by emitting it
-  // into a 'uniqued' section name, create and return the section now.
-  if (GV->isWeakForLinker()) {
-    const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind);
-    SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
-    Mang->getNameWithPrefix(Name, GV, false);
-    return getCOFFSection(Name.str(), false, Kind);
-  }
-
-  if (Kind.isText())
-    return getTextSection();
-
-  return getDataSection();
+unsigned TargetLoweringObjectFile::getTTypeEncoding() const {
+  return dwarf::DW_EH_PE_absptr;
 }
 
diff --git a/lib/Target/X86/Android.mk b/lib/Target/X86/Android.mk
new file mode 100644
index 0000000..f5b8180
--- /dev/null
+++ b/lib/Target/X86/Android.mk
@@ -0,0 +1,47 @@
+LOCAL_PATH := $(call my-dir)
+
+# For the host only
+# =====================================================
+include $(CLEAR_VARS)
+include $(CLEAR_TBLGEN_VARS)
+
+TBLGEN_TABLES :=	\
+  	X86GenRegisterInfo.h.inc	\
+	X86GenRegisterNames.inc \
+    X86GenRegisterInfo.inc	\
+	X86GenInstrNames.inc	\
+    X86GenInstrInfo.inc	\
+	X86GenAsmMatcher.inc	\
+	X86GenDAGISel.inc	\
+    X86GenDisassemblerTables.inc	\
+	X86GenFastISel.inc	\
+    X86GenCallingConv.inc	\
+	X86GenSubtarget.inc	\
+    X86GenEDInfo.inc  
+
+LOCAL_SRC_FILES :=	\
+	X86AsmBackend.cpp	\
+	X86COFFMachineModuleInfo.cpp	\
+	X86CodeEmitter.cpp	\
+	X86ELFWriterInfo.cpp	\
+	X86FastISel.cpp	\
+	X86FloatingPoint.cpp	\
+	X86FloatingPointRegKill.cpp	\
+	X86ISelDAGToDAG.cpp	\
+	X86ISelLowering.cpp	\
+	X86InstrInfo.cpp	\
+	X86JITInfo.cpp	\
+	X86MCAsmInfo.cpp	\
+	X86MCCodeEmitter.cpp	\
+	X86MCTargetExpr.cpp	\
+	X86RegisterInfo.cpp	\
+	X86Subtarget.cpp	\
+	X86TargetMachine.cpp	\
+	X86TargetObjectFile.cpp
+
+LOCAL_MODULE:= libLLVMX86CodeGen
+
+include $(LLVM_HOST_BUILD_MK)
+include $(LLVM_TBLGEN_RULES_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index acf497a..84d7bb7 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -10,6 +10,7 @@
 #include "llvm/Target/TargetAsmParser.h"
 #include "X86.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCExpr.h"
@@ -183,6 +184,14 @@ struct X86Operand : public MCParsedAsmOperand {
 
   bool isReg() const { return Kind == Register; }
 
+  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+    // Add as immediates when possible.
+    if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+    else
+      Inst.addOperand(MCOperand::CreateExpr(Expr));
+  }
+
   void addRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     Inst.addOperand(MCOperand::CreateReg(getReg()));
@@ -190,13 +199,13 @@ struct X86Operand : public MCParsedAsmOperand {
 
   void addImmOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateExpr(getImm()));
+    addExpr(Inst, getImm());
   }
 
   void addImmSExt8Operands(MCInst &Inst, unsigned N) const {
     // FIXME: Support user customization of the render method.
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateExpr(getImm()));
+    addExpr(Inst, getImm());
   }
 
   void addMemOperands(MCInst &Inst, unsigned N) const {
@@ -204,7 +213,7 @@ struct X86Operand : public MCParsedAsmOperand {
     Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
     Inst.addOperand(MCOperand::CreateImm(getMemScale()));
     Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
-    Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
+    addExpr(Inst, getMemDisp());
     Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
   }
 
@@ -218,7 +227,7 @@ struct X86Operand : public MCParsedAsmOperand {
     Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
     Inst.addOperand(MCOperand::CreateImm(getMemScale()));
     Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
-    Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
+    addExpr(Inst, getMemDisp());
   }
 
   static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
@@ -492,24 +501,20 @@ X86Operand *X86ATTAsmParser::ParseMemOperand() {
 bool X86ATTAsmParser::
 ParseInstruction(const StringRef &Name, SMLoc NameLoc,
                  SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
-  // FIXME: Hack to recognize "sal..." for now. We need a way to represent
-  // alternative syntaxes in the .td file, without requiring instruction
-  // duplication.
-  if (Name.startswith("sal")) {
-    std::string Tmp = "shl" + Name.substr(3).str();
-    Operands.push_back(X86Operand::CreateToken(Tmp, NameLoc));
-  } else {
-    // FIXME: This is a hack.  We eventually want to add a general pattern
-    // mechanism to be used in the table gen file for these assembly names that
-    // use the same opcodes.  Also we should only allow the "alternate names"
-    // for rep and repne with the instructions they can only appear with.
-    StringRef PatchedName = Name;
-    if (Name == "repe" || Name == "repz")
-      PatchedName = "rep";
-    else if (Name == "repnz")
-      PatchedName = "repne";
-    Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
-  }
+  // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to
+  // represent alternative syntaxes in the .td file, without requiring
+  // instruction duplication.
+  StringRef PatchedName = StringSwitch<StringRef>(Name)
+    .Case("sal", "shl")
+    .Case("salb", "shlb")
+    .Case("sall", "shll")
+    .Case("salq", "shlq")
+    .Case("salw", "shlw")
+    .Case("repe", "rep")
+    .Case("repz", "rep")
+    .Case("repnz", "repne")
+    .Default(Name);
+  Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
 
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
 
diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
index 38ccbf9..734a545 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
@@ -25,10 +25,15 @@ using namespace llvm;
 
 // Include the auto-generated portion of the assembly writer.
 #define MachineInstr MCInst
+#define GET_INSTRUCTION_NAME
 #include "X86GenAsmWriter.inc"
 #undef MachineInstr
 
 void X86ATTInstPrinter::printInst(const MCInst *MI) { printInstruction(MI); }
+StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const {
+  return getInstructionName(Opcode);
+}
+
 
 void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op) {
   switch (MI->getOperand(Op).getImm()) {
@@ -68,7 +73,7 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo) {
     O << '$' << Op.getImm();
     
     if (CommentStream && (Op.getImm() > 255 || Op.getImm() < -256))
-      *CommentStream << format("imm = 0x%X\n", Op.getImm());
+      *CommentStream << format("imm = 0x%llX\n", (long long)Op.getImm());
     
   } else {
     assert(Op.isExpr() && "unknown operand kind in printOperand");
diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h
index 3180618..d109a07 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h
@@ -26,11 +26,12 @@ public:
 
   
   virtual void printInst(const MCInst *MI);
-  
+  virtual StringRef getOpcodeName(unsigned Opcode) const;
+
   // Autogenerated by tblgen.
   void printInstruction(const MCInst *MI);
   static const char *getRegisterName(unsigned RegNo);
-
+  static const char *getInstructionName(unsigned Opcode);
 
   void printOperand(const MCInst *MI, unsigned OpNo);
   void printMemReference(const MCInst *MI, unsigned Op);
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
index 304306d..8cab24c 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@@ -33,10 +33,11 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/ADT/SmallString.h"
@@ -52,40 +53,42 @@ void X86AsmPrinter::PrintPICBaseSymbol() const {
                                                                     OutContext);
 }
 
+MCSymbol *X86AsmPrinter::GetGlobalValueSymbol(const GlobalValue *GV) const {
+  SmallString<60> NameStr;
+  Mang->getNameWithPrefix(NameStr, GV, false);
+  MCSymbol *Symb = OutContext.GetOrCreateSymbol(NameStr.str());
+
+  if (Subtarget->isTargetCygMing()) {
+    X86COFFMachineModuleInfo &COFFMMI =
+      MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
+    COFFMMI.DecorateCygMingName(Symb, OutContext, GV, *TM.getTargetData());
+
+    // Save function name for later type emission.
+    if (const Function *F = dyn_cast<Function>(GV))
+      if (F->isDeclaration())
+        COFFMMI.addExternalFunction(Symb->getName());
+
+  }
+
+  return Symb;
+}
+
 /// runOnMachineFunction - Emit the function body.
 ///
 bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   SetupMachineFunction(MF);
-  
-  // COFF and Cygwin specific mangling stuff.  This should be moved out to the
-  // mangler or handled some other way?
-  if (Subtarget->isTargetCOFF()) {
-    X86COFFMachineModuleInfo &COFFMMI = 
-      MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
 
-    // Populate function information map.  Don't want to populate
-    // non-stdcall or non-fastcall functions' information right now.
-    const Function *F = MF.getFunction();
-    CallingConv::ID CC = F->getCallingConv();
-    if (CC == CallingConv::X86_StdCall || CC == CallingConv::X86_FastCall)
-      COFFMMI.AddFunctionInfo(F, *MF.getInfo<X86MachineFunctionInfo>());
-  }
-  if (Subtarget->isTargetCygMing()) {
+  if (Subtarget->isTargetCOFF()) {
     const Function *F = MF.getFunction();
-    X86COFFMachineModuleInfo &COFFMMI = 
-      MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
-    COFFMMI.DecorateCygMingName(CurrentFnSym, OutContext,F,*TM.getTargetData());
-    
-    O << "\t.def\t " << *CurrentFnSym;
-    O << ";\t.scl\t" <<
+    O << "\t.def\t " << *CurrentFnSym << ";\t.scl\t" <<
     (F->hasInternalLinkage() ? COFF::C_STAT : COFF::C_EXT)
     << ";\t.type\t" << (COFF::DT_FCN << COFF::N_BTSHFT)
     << ";\t.endef\n";
   }
-  
+
   // Have common code print out the function header with linkage info etc.
   EmitFunctionHeader();
-  
+
   // Emit the rest of the function body.
   EmitFunctionBody();
 
@@ -119,12 +122,6 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) {
     else
       GVSym = GetGlobalValueSymbol(GV);
 
-    if (Subtarget->isTargetCygMing()) {
-      X86COFFMachineModuleInfo &COFFMMI =
-        MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
-      COFFMMI.DecorateCygMingName(GVSym, OutContext, GV, *TM.getTargetData());
-    }
-    
     // Handle dllimport linkage.
     if (MO.getTargetFlags() == X86II::MO_DLLIMPORT)
       GVSym = OutContext.GetOrCreateSymbol(Twine("__imp_") + GVSym->getName());
@@ -585,7 +582,6 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
       for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I)
         if (I->hasDLLExportLinkage()) {
           MCSymbol *Sym = GetGlobalValueSymbol(I);
-          COFFMMI.DecorateCygMingName(Sym, OutContext, I, *TM.getTargetData());
           DLLExportedFns.push_back(Sym);
         }
 
@@ -607,6 +603,28 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
       }
     }
   }
+
+  if (Subtarget->isTargetELF()) {
+    TargetLoweringObjectFileELF &TLOFELF =
+      static_cast<TargetLoweringObjectFileELF &>(getObjFileLowering());
+
+    MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+    // Output stubs for external and common global variables.
+    MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
+    if (!Stubs.empty()) {
+      OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
+      const TargetData *TD = TM.getTargetData();
+
+      for (unsigned i = 0, e = Stubs.size(); i != e; ++i)
+        O << *Stubs[i].first << ":\n"
+          << (TD->getPointerSize() == 8 ?
+              MAI->getData64bitsDirective() : MAI->getData32bitsDirective())
+          << *Stubs[i].second << '\n';
+
+      Stubs.clear();
+    }
+  }
 }
 
 
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
index 1d32a5f..039214a 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
@@ -61,8 +61,7 @@ class VISIBILITY_HIDDEN X86AsmPrinter : public AsmPrinter {
   virtual void EmitInstruction(const MachineInstr *MI);
   
   void printSymbolOperand(const MachineOperand &MO);
-  
-  
+  virtual MCSymbol *GetGlobalValueSymbol(const GlobalValue *GV) const;
 
   // These methods are used by the tablegen'erated instruction printer.
   void printOperand(const MachineInstr *MI, unsigned OpNo,
diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp
index 4274d0a..610beb5 100644
--- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp
@@ -24,10 +24,14 @@ using namespace llvm;
 
 // Include the auto-generated portion of the assembly writer.
 #define MachineInstr MCInst
+#define GET_INSTRUCTION_NAME
 #include "X86GenAsmWriter1.inc"
 #undef MachineInstr
 
 void X86IntelInstPrinter::printInst(const MCInst *MI) { printInstruction(MI); }
+StringRef X86IntelInstPrinter::getOpcodeName(unsigned Opcode) const {
+  return getInstructionName(Opcode);
+}
 
 void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op) {
   switch (MI->getOperand(Op).getImm()) {
diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h
index 1976177..545bf84 100644
--- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h
@@ -26,10 +26,12 @@ public:
     : MCInstPrinter(O, MAI) {}
   
   virtual void printInst(const MCInst *MI);
+  virtual StringRef getOpcodeName(unsigned Opcode) const;
   
   // Autogenerated by tblgen.
   void printInstruction(const MCInst *MI);
   static const char *getRegisterName(unsigned RegNo);
+  static const char *getInstructionName(unsigned Opcode);
 
 
   void printOperand(const MCInst *MI, unsigned OpNo,
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index 61f26a7..eed3b45 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -15,6 +15,7 @@ tablegen(X86GenCallingConv.inc -gen-callingconv)
 tablegen(X86GenSubtarget.inc -gen-subtarget)
 
 set(sources
+  X86AsmBackend.cpp
   X86CodeEmitter.cpp
   X86COFFMachineModuleInfo.cpp
   X86ELFWriterInfo.cpp
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index 19eb05e..e5f84e8 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -67,8 +67,8 @@ no_exit.i7:		; preds = %no_exit.i7, %build_tree.exit
                                    [ %tmp.34.i18, %no_exit.i7 ]
 	%tmp.0.0.0.i10 = phi double [ 0.000000e+00, %build_tree.exit ],
                                     [ %tmp.28.i16, %no_exit.i7 ]
-	%tmp.28.i16 = add double %tmp.0.0.0.i10, 0.000000e+00
-	%tmp.34.i18 = add double %tmp.0.1.0.i9, 0.000000e+00
+	%tmp.28.i16 = fadd double %tmp.0.0.0.i10, 0.000000e+00
+	%tmp.34.i18 = fadd double %tmp.0.1.0.i9, 0.000000e+00
 	br i1 false, label %Compute_Tree.exit23, label %no_exit.i7
 
 Compute_Tree.exit23:		; preds = %no_exit.i7
@@ -97,7 +97,7 @@ pcmp/pand/pandn/por to do a selection instead of a conditional branch:
 
 double %X(double %Y, double %Z, double %A, double %B) {
         %C = setlt double %A, %B
-        %z = add double %Z, 0.0    ;; select operand is not a load
+        %z = fadd double %Z, 0.0    ;; select operand is not a load
         %D = select bool %C, double %Y, double %z
         ret double %D
 }
@@ -545,7 +545,7 @@ eliminates a constant pool load.  For example, consider:
 
 define i64 @ccosf(float %z.0, float %z.1) nounwind readonly  {
 entry:
- %tmp6 = sub float -0.000000e+00, %z.1		; <float> [#uses=1]
+ %tmp6 = fsub float -0.000000e+00, %z.1		; <float> [#uses=1]
  %tmp20 = tail call i64 @ccoshf( float %tmp6, float %z.0 ) nounwind readonly
  ret i64 %tmp20
 }
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index aa7bb3d..d4545a6 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -227,11 +227,6 @@ lambda, siod, optimizer-eval, ackermann, hash2, nestedloop, strcat, and Treesor.
 
 //===---------------------------------------------------------------------===//
 
-Teach the coalescer to coalesce vregs of different register classes. e.g. FR32 /
-FR64 to VR128.
-
-//===---------------------------------------------------------------------===//
-
 Adding to the list of cmp / test poor codegen issues:
 
 int test(__m128 *A, __m128 *B) {
@@ -1868,3 +1863,69 @@ carried over to machine instructions. Asm printer (or JIT) can use this
 information to add the "lock" prefix.
 
 //===---------------------------------------------------------------------===//
+
+_Bool bar(int *x) { return *x & 1; }
+
+define zeroext i1 @bar(i32* nocapture %x) nounwind readonly {
+entry:
+  %tmp1 = load i32* %x                            ; <i32> [#uses=1]
+  %and = and i32 %tmp1, 1                         ; <i32> [#uses=1]
+  %tobool = icmp ne i32 %and, 0                   ; <i1> [#uses=1]
+  ret i1 %tobool
+}
+
+bar:                                                        # @bar
+# BB#0:                                                     # %entry
+	movl	4(%esp), %eax
+	movb	(%eax), %al
+	andb	$1, %al
+	movzbl	%al, %eax
+	ret
+
+Missed optimization: should be movl+andl.
+
+//===---------------------------------------------------------------------===//
+
+Consider the following two functions compiled with clang:
+_Bool foo(int *x) { return !(*x & 4); }
+unsigned bar(int *x) { return !(*x & 4); }
+
+foo:
+	movl	4(%esp), %eax
+	testb	$4, (%eax)
+	sete	%al
+	movzbl	%al, %eax
+	ret
+
+bar:
+	movl	4(%esp), %eax
+	movl	(%eax), %eax
+	shrl	$2, %eax
+	andl	$1, %eax
+	xorl	$1, %eax
+	ret
+
+The second function generates more code even though the two functions are
+are functionally identical.
+
+//===---------------------------------------------------------------------===//
+
+Take the following C code:
+int x(int y) { return (y & 63) << 14; }
+
+Code produced by gcc:
+	andl	$63, %edi
+	sall	$14, %edi
+	movl	%edi, %eax
+	ret
+
+Code produced by clang:
+	shll	$14, %edi
+	movl	%edi, %eax
+	andl	$1032192, %eax
+	ret
+
+The code produced by gcc is 3 bytes shorter.  This sort of construct often
+shows up with bitfields.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/TargetInfo/Android.mk b/lib/Target/X86/TargetInfo/Android.mk
new file mode 100644
index 0000000..ee53f0d
--- /dev/null
+++ b/lib/Target/X86/TargetInfo/Android.mk
@@ -0,0 +1,24 @@
+LOCAL_PATH := $(call my-dir)
+
+# For the device only
+# =====================================================
+include $(CLEAR_VARS)
+include $(CLEAR_TBLGEN_VARS)
+
+TBLGEN_TABLES :=	\
+	X86GenRegisterNames.inc	\
+	X86GenInstrNames.inc
+
+TBLGEN_TD_DIR := $(LOCAL_PATH)/..
+
+LOCAL_SRC_FILES :=	\
+	X86TargetInfo.cpp
+	
+LOCAL_C_INCLUDES +=	\
+	$(LOCAL_PATH)/..
+
+LOCAL_MODULE:= libLLVMX86Info
+
+include $(LLVM_HOST_BUILD_MK)
+include $(LLVM_TBLGEN_RULES_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 1d17a05..ba0ee6c 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -19,12 +19,15 @@
 
 namespace llvm {
 
-class X86TargetMachine;
 class FunctionPass;
-class MachineCodeEmitter;
-class MCCodeEmitter;
 class JITCodeEmitter;
+class MCAssembler;
+class MCCodeEmitter;
+class MCContext;
+class MachineCodeEmitter;
 class Target;
+class TargetAsmBackend;
+class X86TargetMachine;
 class formatted_raw_ostream;
 
 /// createX86ISelDag - This pass converts a legalized DAG into a 
@@ -49,9 +52,13 @@ FunctionPass *createX87FPRegKillInserterPass();
 FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM,
                                           JITCodeEmitter &JCE);
 
-MCCodeEmitter *createHeinousX86MCCodeEmitter(const Target &, TargetMachine &TM);
-MCCodeEmitter *createX86_32MCCodeEmitter(const Target &, TargetMachine &TM);
-MCCodeEmitter *createX86_64MCCodeEmitter(const Target &, TargetMachine &TM);
+MCCodeEmitter *createX86_32MCCodeEmitter(const Target &, TargetMachine &TM,
+                                         MCContext &Ctx);
+MCCodeEmitter *createX86_64MCCodeEmitter(const Target &, TargetMachine &TM,
+                                         MCContext &Ctx);
+
+TargetAsmBackend *createX86_32AsmBackend(const Target &, MCAssembler &);
+TargetAsmBackend *createX86_64AsmBackend(const Target &, MCAssembler &);
 
 /// createX86EmitCodeToMemory - Returns a pass that converts a register
 /// allocated function into raw machine code in a dynamically
diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp
new file mode 100644
index 0000000..e6654ef
--- /dev/null
+++ b/lib/Target/X86/X86AsmBackend.cpp
@@ -0,0 +1,34 @@
+//===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmBackend.h"
+#include "X86.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmBackend.h"
+using namespace llvm;
+
+namespace {
+
+class X86AsmBackend : public TargetAsmBackend {
+public:
+  X86AsmBackend(const Target &T, MCAssembler &A)
+    : TargetAsmBackend(T) {}
+};
+
+}
+
+TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
+                                               MCAssembler &A) {
+  return new X86AsmBackend(T, A);
+}
+
+TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
+                                               MCAssembler &A) {
+  return new X86AsmBackend(T, A);
+}
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.cpp b/lib/Target/X86/X86COFFMachineModuleInfo.cpp
index ea52795..ab67acb 100644
--- a/lib/Target/X86/X86COFFMachineModuleInfo.cpp
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.cpp
@@ -27,90 +27,55 @@ X86COFFMachineModuleInfo::X86COFFMachineModuleInfo(const MachineModuleInfo &) {
 X86COFFMachineModuleInfo::~X86COFFMachineModuleInfo() {
 }
 
-void X86COFFMachineModuleInfo::AddFunctionInfo(const Function *F,
-                                            const X86MachineFunctionInfo &Val) {
-  FunctionInfoMap[F] = Val;
+void X86COFFMachineModuleInfo::addExternalFunction(const StringRef& Name) {
+  CygMingStubs.insert(Name);
 }
 
-
-
-static X86MachineFunctionInfo calculateFunctionInfo(const Function *F,
-                                                    const TargetData &TD) {
-  X86MachineFunctionInfo Info;
-  uint64_t Size = 0;
-  
-  switch (F->getCallingConv()) {
-  case CallingConv::X86_StdCall:
-    Info.setDecorationStyle(StdCall);
-    break;
-  case CallingConv::X86_FastCall:
-    Info.setDecorationStyle(FastCall);
-    break;
-  default:
-    return Info;
-  }
-  
-  unsigned argNum = 1;
-  for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
-       AI != AE; ++AI, ++argNum) {
-    const Type* Ty = AI->getType();
-    
-    // 'Dereference' type in case of byval parameter attribute
-    if (F->paramHasAttr(argNum, Attribute::ByVal))
-      Ty = cast<PointerType>(Ty)->getElementType();
-    
-    // Size should be aligned to DWORD boundary
-    Size += ((TD.getTypeAllocSize(Ty) + 3)/4)*4;
-  }
-  
-  // We're not supporting tooooo huge arguments :)
-  Info.setBytesToPopOnReturn((unsigned int)Size);
-  return Info;
-}
-
-
-/// DecorateCygMingName - Query FunctionInfoMap and use this information for
-/// various name decorations for Cygwin and MingW.
+/// DecorateCygMingName - Apply various name decorations if the function uses
+/// stdcall or fastcall calling convention.
 void X86COFFMachineModuleInfo::DecorateCygMingName(SmallVectorImpl<char> &Name,
                                                    const GlobalValue *GV,
                                                    const TargetData &TD) {
   const Function *F = dyn_cast<Function>(GV);
   if (!F) return;
-  
-  // Save function name for later type emission.
-  if (F->isDeclaration())
-    CygMingStubs.insert(StringRef(Name.data(), Name.size()));
-  
+
   // We don't want to decorate non-stdcall or non-fastcall functions right now
   CallingConv::ID CC = F->getCallingConv();
   if (CC != CallingConv::X86_StdCall && CC != CallingConv::X86_FastCall)
     return;
-  
-  const X86MachineFunctionInfo *Info;
-  
-  FMFInfoMap::const_iterator info_item = FunctionInfoMap.find(F);
-  if (info_item == FunctionInfoMap.end()) {
-    // Calculate apropriate function info and populate map
-    FunctionInfoMap[F] = calculateFunctionInfo(F, TD);
-    Info = &FunctionInfoMap[F];
-  } else {
-    Info = &info_item->second;
-  }
-  
-  if (Info->getDecorationStyle() == None) return;
+
+  unsigned ArgWords = 0;
+  DenseMap<const Function*, unsigned>::const_iterator item = FnArgWords.find(F);
+  if (item == FnArgWords.end()) {
+    // Calculate arguments sizes
+    for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+         AI != AE; ++AI) {
+      const Type* Ty = AI->getType();
+
+      // 'Dereference' type in case of byval parameter attribute
+      if (AI->hasByValAttr())
+        Ty = cast<PointerType>(Ty)->getElementType();
+
+      // Size should be aligned to DWORD boundary
+      ArgWords += ((TD.getTypeAllocSize(Ty) + 3)/4)*4;
+    }
+
+    FnArgWords[F] = ArgWords;
+  } else
+    ArgWords = item->second;
+
   const FunctionType *FT = F->getFunctionType();
-  
   // "Pure" variadic functions do not receive @0 suffix.
   if (!FT->isVarArg() || FT->getNumParams() == 0 ||
       (FT->getNumParams() == 1 && F->hasStructRetAttr()))
-    raw_svector_ostream(Name) << '@' << Info->getBytesToPopOnReturn();
-  
-  if (Info->getDecorationStyle() == FastCall) {
+    raw_svector_ostream(Name) << '@' << ArgWords;
+
+  if (CC == CallingConv::X86_FastCall) {
     if (Name[0] == '_')
       Name[0] = '@';
     else
       Name.insert(Name.begin(), '@');
-  }    
+  }
 }
 
 /// DecorateCygMingName - Query FunctionInfoMap and use this information for
@@ -121,6 +86,6 @@ void X86COFFMachineModuleInfo::DecorateCygMingName(MCSymbol *&Name,
                                                    const TargetData &TD) {
   SmallString<128> NameStr(Name->getName().begin(), Name->getName().end());
   DecorateCygMingName(NameStr, GV, TD);
-  
+
   Name = Ctx.GetOrCreateSymbol(NameStr.str());
 }
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h
index 0e2009e..9de3dcd 100644
--- a/lib/Target/X86/X86COFFMachineModuleInfo.h
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.h
@@ -21,44 +21,25 @@
 namespace llvm {
   class X86MachineFunctionInfo;
   class TargetData;
-  
+
 /// X86COFFMachineModuleInfo - This is a MachineModuleInfoImpl implementation
 /// for X86 COFF targets.
 class X86COFFMachineModuleInfo : public MachineModuleInfoImpl {
   StringSet<> CygMingStubs;
-  
-  // We have to propagate some information about MachineFunction to
-  // AsmPrinter. It's ok, when we're printing the function, since we have
-  // access to MachineFunction and can get the appropriate MachineFunctionInfo.
-  // Unfortunately, this is not possible when we're printing reference to
-  // Function (e.g. calling it and so on). Even more, there is no way to get the
-  // corresponding MachineFunctions: it can even be not created at all. That's
-  // why we should use additional structure, when we're collecting all necessary
-  // information.
-  //
-  // This structure is using e.g. for name decoration for stdcall & fastcall'ed
-  // function, since we have to use arguments' size for decoration.
-  typedef std::map<const Function*, X86MachineFunctionInfo> FMFInfoMap;
-  FMFInfoMap FunctionInfoMap;
-  
+  DenseMap<const Function*, unsigned> FnArgWords;
 public:
   X86COFFMachineModuleInfo(const MachineModuleInfo &);
   ~X86COFFMachineModuleInfo();
-  
-  
+
   void DecorateCygMingName(MCSymbol* &Name, MCContext &Ctx,
                            const GlobalValue *GV, const TargetData &TD);
   void DecorateCygMingName(SmallVectorImpl<char> &Name, const GlobalValue *GV,
                            const TargetData &TD);
-  
-  void AddFunctionInfo(const Function *F, const X86MachineFunctionInfo &Val);
-  
 
+  void addExternalFunction(const StringRef& Name);
   typedef StringSet<>::const_iterator stub_iterator;
   stub_iterator stub_begin() const { return CygMingStubs.begin(); }
   stub_iterator stub_end() const { return CygMingStubs.end(); }
-
-  
 };
 
 
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index f0bceb1..8deadf6 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -387,10 +387,16 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
   // If no BaseReg, issue a RIP relative instruction only if the MCE can 
   // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table
   // 2-7) and absolute references.
+  unsigned BaseRegNo = -1U;
+  if (BaseReg != 0 && BaseReg != X86::RIP)
+    BaseRegNo = getX86RegNum(BaseReg);
+
   if (// The SIB byte must be used if there is an index register.
       IndexReg.getReg() == 0 && 
-      // The SIB byte must be used if the base is ESP/RSP.
-      BaseReg != X86::ESP && BaseReg != X86::RSP &&
+      // The SIB byte must be used if the base is ESP/RSP/R12, all of which
+      // encode to an R/M value of 4, which indicates that a SIB byte is
+      // present.
+      BaseRegNo != N86::ESP &&
       // If there is no base register and we're in 64-bit mode, we need a SIB
       // byte to emit an addr that is just 'disp32' (the non-RIP relative form).
       (!Is64BitMode || BaseReg != 0)) {
@@ -401,7 +407,6 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
       return;
     }
     
-    unsigned BaseRegNo = getX86RegNum(BaseReg);
     // If the base is not EBP/ESP and there is no displacement, use simple
     // indirect register encoding, this handles addresses like [EAX].  The
     // encoding for [EBP] with no displacement means [disp32] so we handle it
@@ -757,27 +762,8 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
   case X86II::MRM4r: case X86II::MRM5r:
   case X86II::MRM6r: case X86II::MRM7r: {
     MCE.emitByte(BaseOpcode);
-
-    // Special handling of lfence, mfence, monitor, and mwait.
-    if (Desc->getOpcode() == X86::LFENCE ||
-        Desc->getOpcode() == X86::MFENCE ||
-        Desc->getOpcode() == X86::MONITOR ||
-        Desc->getOpcode() == X86::MWAIT) {
-      emitRegModRMByte((Desc->TSFlags & X86II::FormMask)-X86II::MRM0r);
-
-      switch (Desc->getOpcode()) {
-      default: break;
-      case X86::MONITOR:
-        MCE.emitByte(0xC8);
-        break;
-      case X86::MWAIT:
-        MCE.emitByte(0xC9);
-        break;
-      }
-    } else {
-      emitRegModRMByte(MI.getOperand(CurOp++).getReg(),
-                       (Desc->TSFlags & X86II::FormMask)-X86II::MRM0r);
-    }
+    emitRegModRMByte(MI.getOperand(CurOp++).getReg(),
+                     (Desc->TSFlags & X86II::FormMask)-X86II::MRM0r);
 
     if (CurOp == NumOps)
       break;
@@ -853,6 +839,27 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
                      getX86RegNum(MI.getOperand(CurOp).getReg()));
     ++CurOp;
     break;
+      
+  case X86II::MRM_C1:
+    MCE.emitByte(BaseOpcode);
+    MCE.emitByte(0xC1);
+    break;
+  case X86II::MRM_C8:
+    MCE.emitByte(BaseOpcode);
+    MCE.emitByte(0xC8);
+    break;
+  case X86II::MRM_C9:
+    MCE.emitByte(BaseOpcode);
+    MCE.emitByte(0xC9);
+    break;
+  case X86II::MRM_E8:
+    MCE.emitByte(BaseOpcode);
+    MCE.emitByte(0xE8);
+    break;
+  case X86II::MRM_F0:
+    MCE.emitByte(BaseOpcode);
+    MCE.emitByte(0xF0);
+    break;
   }
 
   if (!Desc->isVariadic() && CurOp != NumOps) {
@@ -864,335 +871,3 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
 
   MCE.processDebugLoc(MI.getDebugLoc(), false);
 }
-
-// Adapt the Emitter / CodeEmitter interfaces to MCCodeEmitter.
-//
-// FIXME: This is a total hack designed to allow work on llvm-mc to proceed
-// without being blocked on various cleanups needed to support a clean interface
-// to instruction encoding.
-//
-// Look away!
-
-#include "llvm/DerivedTypes.h"
-
-namespace {
-class MCSingleInstructionCodeEmitter : public MachineCodeEmitter {
-  uint8_t Data[256];
-  const MCInst *CurrentInst;
-  SmallVectorImpl<MCFixup> *FixupList;
-
-public:
-  MCSingleInstructionCodeEmitter() { reset(0, 0); }
-
-  void reset(const MCInst *Inst, SmallVectorImpl<MCFixup> *Fixups) {
-    CurrentInst = Inst;
-    FixupList = Fixups;
-    BufferBegin = Data;
-    BufferEnd = array_endof(Data);
-    CurBufferPtr = Data;
-  }
-
-  StringRef str() {
-    return StringRef(reinterpret_cast<char*>(BufferBegin),
-                     CurBufferPtr - BufferBegin);
-  }
-
-  virtual void startFunction(MachineFunction &F) {}
-  virtual bool finishFunction(MachineFunction &F) { return false; }
-  virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {}
-  virtual bool earlyResolveAddresses() const { return false; }
-  virtual void addRelocation(const MachineRelocation &MR) {
-    unsigned Offset = 0, OpIndex = 0, Kind = MR.getRelocationType();
-
-    // This form is only used in one case, for branches.
-    if (MR.isBasicBlock()) {
-      Offset = unsigned(MR.getMachineCodeOffset());
-      OpIndex = 0;
-    } else {
-      assert(MR.isJumpTableIndex() && "Unexpected relocation!");
-
-      Offset = unsigned(MR.getMachineCodeOffset());
-
-      // The operand index is encoded as the first byte of the fake operand.
-      OpIndex = MR.getJumpTableIndex();
-    }
-
-    MCOperand Op = CurrentInst->getOperand(OpIndex);
-    assert(Op.isExpr() && "FIXME: Not yet implemented!");
-    FixupList->push_back(MCFixup::Create(Offset, Op.getExpr(),
-                                     MCFixupKind(FirstTargetFixupKind + Kind)));
-  }
-  virtual void setModuleInfo(MachineModuleInfo* Info) {}
-
-  // Interface functions which should never get called in our usage.
-
-  virtual void emitLabel(uint64_t LabelID) {
-    assert(0 && "Unexpected code emitter call!");
-  }
-  virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const {
-    assert(0 && "Unexpected code emitter call!");
-    return 0;
-  }
-  virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const {
-    assert(0 && "Unexpected code emitter call!");
-    return 0;
-  }
-  virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
-    assert(0 && "Unexpected code emitter call!");
-    return 0;
-  }
-  virtual uintptr_t getLabelAddress(uint64_t LabelID) const {
-    assert(0 && "Unexpected code emitter call!");
-    return 0;
-  }
-};
-
-class X86MCCodeEmitter : public MCCodeEmitter {
-  X86MCCodeEmitter(const X86MCCodeEmitter &); // DO NOT IMPLEMENT
-  void operator=(const X86MCCodeEmitter &); // DO NOT IMPLEMENT
-
-private:
-  X86TargetMachine &TM;
-  llvm::Function *DummyF;
-  TargetData *DummyTD;
-  mutable llvm::MachineFunction *DummyMF;
-  llvm::MachineBasicBlock *DummyMBB;
-  
-  MCSingleInstructionCodeEmitter *InstrEmitter;
-  Emitter<MachineCodeEmitter> *Emit;
-
-public:
-  X86MCCodeEmitter(X86TargetMachine &_TM) : TM(_TM) {
-    // Verily, thou shouldst avert thine eyes.
-    const llvm::FunctionType *FTy =
-      FunctionType::get(llvm::Type::getVoidTy(getGlobalContext()), false);
-    DummyF = Function::Create(FTy, GlobalValue::InternalLinkage);
-    DummyTD = new TargetData("");
-    DummyMF = new MachineFunction(DummyF, TM, 0);
-    DummyMBB = DummyMF->CreateMachineBasicBlock();
-
-    InstrEmitter = new MCSingleInstructionCodeEmitter();
-    Emit = new Emitter<MachineCodeEmitter>(TM, *InstrEmitter, 
-                                           *TM.getInstrInfo(),
-                                           *DummyTD, false);
-  }
-  ~X86MCCodeEmitter() {
-    delete Emit;
-    delete InstrEmitter;
-    delete DummyMF;
-    delete DummyF;
-  }
-
-  unsigned getNumFixupKinds() const {
-    return 5;
-  }
-
-  MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
-    static MCFixupKindInfo Infos[] = {
-      { "reloc_pcrel_word", 0, 4 * 8 },
-      { "reloc_picrel_word", 0, 4 * 8 },
-      { "reloc_absolute_word", 0, 4 * 8 },
-      { "reloc_absolute_word_sext", 0, 4 * 8 },
-      { "reloc_absolute_dword", 0, 8 * 8 }
-    };
-
-    assert(Kind >= FirstTargetFixupKind && Kind < MaxTargetFixupKind &&
-           "Invalid kind!");
-    return Infos[Kind - FirstTargetFixupKind];
-  }
-
-  bool AddRegToInstr(const MCInst &MI, MachineInstr *Instr,
-                     unsigned Start) const {
-    if (Start + 1 > MI.getNumOperands())
-      return false;
-
-    const MCOperand &Op = MI.getOperand(Start);
-    if (!Op.isReg()) return false;
-
-    Instr->addOperand(MachineOperand::CreateReg(Op.getReg(), false));
-    return true;
-  }
-
-  bool AddImmToInstr(const MCInst &MI, MachineInstr *Instr,
-                     unsigned Start) const {
-    if (Start + 1 > MI.getNumOperands())
-      return false;
-
-    const MCOperand &Op = MI.getOperand(Start);
-    if (Op.isImm()) {
-      Instr->addOperand(MachineOperand::CreateImm(Op.getImm()));
-      return true;
-    }
-    if (!Op.isExpr())
-      return false;
-
-    const MCExpr *Expr = Op.getExpr();
-    if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr)) {
-      Instr->addOperand(MachineOperand::CreateImm(CE->getValue()));
-      return true;
-    }
-
-    // Fake this as an external symbol to the code emitter to add a relcoation
-    // entry we will recognize.
-    Instr->addOperand(MachineOperand::CreateJTI(Start, 0));
-    return true;
-  }
-
-  bool AddLMemToInstr(const MCInst &MI, MachineInstr *Instr,
-                     unsigned Start) const {
-    return (AddRegToInstr(MI, Instr, Start + 0) &&
-            AddImmToInstr(MI, Instr, Start + 1) &&
-            AddRegToInstr(MI, Instr, Start + 2) &&
-            AddImmToInstr(MI, Instr, Start + 3));
-  }
-
-  bool AddMemToInstr(const MCInst &MI, MachineInstr *Instr,
-                     unsigned Start) const {
-    return (AddRegToInstr(MI, Instr, Start + 0) &&
-            AddImmToInstr(MI, Instr, Start + 1) &&
-            AddRegToInstr(MI, Instr, Start + 2) &&
-            AddImmToInstr(MI, Instr, Start + 3) &&
-            AddRegToInstr(MI, Instr, Start + 4));
-  }
-
-  void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
-                         SmallVectorImpl<MCFixup> &Fixups) const {
-    // Don't look yet!
-
-    // Convert the MCInst to a MachineInstr so we can (ab)use the regular
-    // emitter.
-    const X86InstrInfo &II = *TM.getInstrInfo();
-    const TargetInstrDesc &Desc = II.get(MI.getOpcode());    
-    MachineInstr *Instr = DummyMF->CreateMachineInstr(Desc, DebugLoc());
-    DummyMBB->push_back(Instr);
-
-    unsigned Opcode = MI.getOpcode();
-    unsigned NumOps = MI.getNumOperands();
-    unsigned CurOp = 0;
-    bool AddTied = false;
-    if (NumOps > 1 && Desc.getOperandConstraint(1, TOI::TIED_TO) != -1)
-      AddTied = true;
-    else if (NumOps > 2 && 
-             Desc.getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0)
-      // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32
-      --NumOps;
-
-    bool OK = true;
-    switch (Desc.TSFlags & X86II::FormMask) {
-    case X86II::MRMDestReg:
-    case X86II::MRMSrcReg:
-      // Matching doesn't fill this in completely, we have to choose operand 0
-      // for a tied register.
-      OK &= AddRegToInstr(MI, Instr, CurOp++);
-      if (AddTied)
-        OK &= AddRegToInstr(MI, Instr, CurOp++ - 1);
-      OK &= AddRegToInstr(MI, Instr, CurOp++);
-      if (CurOp < NumOps)
-        OK &= AddImmToInstr(MI, Instr, CurOp);
-      break;
-
-    case X86II::RawFrm:
-      if (CurOp < NumOps) {
-        // Hack to make branches work.
-        if (!(Desc.TSFlags & X86II::ImmMask) &&
-            MI.getOperand(0).isExpr() &&
-            isa<MCSymbolRefExpr>(MI.getOperand(0).getExpr()))
-          Instr->addOperand(MachineOperand::CreateMBB(DummyMBB));
-        else
-          OK &= AddImmToInstr(MI, Instr, CurOp);
-      }
-      break;
-
-    case X86II::AddRegFrm:
-      // Matching doesn't fill this in completely, we have to choose operand 0
-      // for a tied register.
-      OK &= AddRegToInstr(MI, Instr, CurOp++);
-      if (AddTied)
-        OK &= AddRegToInstr(MI, Instr, CurOp++ - 1);
-      if (CurOp < NumOps)
-        OK &= AddImmToInstr(MI, Instr, CurOp);
-      break;
-
-    case X86II::MRM0r: case X86II::MRM1r:
-    case X86II::MRM2r: case X86II::MRM3r:
-    case X86II::MRM4r: case X86II::MRM5r:
-    case X86II::MRM6r: case X86II::MRM7r:
-      // Matching doesn't fill this in completely, we have to choose operand 0
-      // for a tied register.
-      OK &= AddRegToInstr(MI, Instr, CurOp++);
-      if (AddTied)
-        OK &= AddRegToInstr(MI, Instr, CurOp++ - 1);
-      if (CurOp < NumOps)
-        OK &= AddImmToInstr(MI, Instr, CurOp);
-      break;
-      
-    case X86II::MRM0m: case X86II::MRM1m:
-    case X86II::MRM2m: case X86II::MRM3m:
-    case X86II::MRM4m: case X86II::MRM5m:
-    case X86II::MRM6m: case X86II::MRM7m:
-      OK &= AddMemToInstr(MI, Instr, CurOp); CurOp += 5;
-      if (CurOp < NumOps)
-        OK &= AddImmToInstr(MI, Instr, CurOp);
-      break;
-
-    case X86II::MRMSrcMem:
-      // Matching doesn't fill this in completely, we have to choose operand 0
-      // for a tied register.
-      OK &= AddRegToInstr(MI, Instr, CurOp++);
-      if (AddTied)
-        OK &= AddRegToInstr(MI, Instr, CurOp++ - 1);
-      if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
-          Opcode == X86::LEA16r || Opcode == X86::LEA32r)
-        OK &= AddLMemToInstr(MI, Instr, CurOp);
-      else
-        OK &= AddMemToInstr(MI, Instr, CurOp);
-      break;
-
-    case X86II::MRMDestMem:
-      OK &= AddMemToInstr(MI, Instr, CurOp); CurOp += 5;
-      OK &= AddRegToInstr(MI, Instr, CurOp);
-      break;
-
-    default:
-    case X86II::MRMInitReg:
-    case X86II::Pseudo:
-      OK = false;
-      break;
-    }
-
-    if (!OK) {
-      dbgs() << "couldn't convert inst '";
-      MI.dump();
-      dbgs() << "' to machine instr:\n";
-      Instr->dump();
-    }
-
-    InstrEmitter->reset(&MI, &Fixups);
-    if (OK)
-      Emit->emitInstruction(*Instr, &Desc);
-    OS << InstrEmitter->str();
-
-    Instr->eraseFromParent();
-  }
-};
-}
-
-#include "llvm/Support/CommandLine.h"
-
-static cl::opt<bool> EnableNewEncoder("enable-new-x86-encoder",
-                                      cl::ReallyHidden);
-
-
-// Ok, now you can look.
-MCCodeEmitter *llvm::createHeinousX86MCCodeEmitter(const Target &T,
-                                                   TargetMachine &TM) {
-  
-  // FIXME: Remove the heinous one when the new one works.
-  if (EnableNewEncoder) {
-    if (TM.getTargetData()->getPointerSize() == 4)
-      return createX86_32MCCodeEmitter(T, TM);
-    return createX86_64MCCodeEmitter(T, TM);
-  }
-
-  return new X86MCCodeEmitter(static_cast<X86TargetMachine&>(TM));
-}
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index ea398e9..98e3f4e 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -388,6 +388,8 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) {
   }
 
   case Instruction::GetElementPtr: {
+    X86AddressMode SavedAM = AM;
+
     // Pattern-match simple GEPs.
     uint64_t Disp = (int32_t)AM.Disp;
     unsigned IndexReg = AM.IndexReg;
@@ -428,7 +430,13 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) {
     AM.IndexReg = IndexReg;
     AM.Scale = Scale;
     AM.Disp = (uint32_t)Disp;
-    return X86SelectAddress(U->getOperand(0), AM);
+    if (X86SelectAddress(U->getOperand(0), AM))
+      return true;
+    
+    // If we couldn't merge the sub value into this addr mode, revert back to
+    // our address and just match the value instead of completely failing.
+    AM = SavedAM;
+    break;
   unsupported_gep:
     // Ok, the GEP indices weren't all covered.
     break;
@@ -786,8 +794,8 @@ bool X86FastISel::X86SelectCmp(Instruction *I) {
 
 bool X86FastISel::X86SelectZExt(Instruction *I) {
   // Handle zero-extension from i1 to i8, which is common.
-  if (I->getType()->isInteger(8) &&
-      I->getOperand(0)->getType()->isInteger(1)) {
+  if (I->getType()->isIntegerTy(8) &&
+      I->getOperand(0)->getType()->isIntegerTy(1)) {
     unsigned ResultReg = getRegForValue(I->getOperand(0));
     if (ResultReg == 0) return false;
     // Set the high bits to zero.
@@ -828,30 +836,30 @@ bool X86FastISel::X86SelectBranch(Instruction *I) {
         std::swap(TrueMBB, FalseMBB);
         Predicate = CmpInst::FCMP_UNE;
         // FALL THROUGH
-      case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE; break;
-      case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA;  break;
-      case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE; break;
-      case CmpInst::FCMP_OLT: SwapArgs = true;  BranchOpc = X86::JA;  break;
-      case CmpInst::FCMP_OLE: SwapArgs = true;  BranchOpc = X86::JAE; break;
-      case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE; break;
-      case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP; break;
-      case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP;  break;
-      case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE;  break;
-      case CmpInst::FCMP_UGT: SwapArgs = true;  BranchOpc = X86::JB;  break;
-      case CmpInst::FCMP_UGE: SwapArgs = true;  BranchOpc = X86::JBE; break;
-      case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB;  break;
-      case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE; break;
+      case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
+      case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA_4;  break;
+      case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE_4; break;
+      case CmpInst::FCMP_OLT: SwapArgs = true;  BranchOpc = X86::JA_4;  break;
+      case CmpInst::FCMP_OLE: SwapArgs = true;  BranchOpc = X86::JAE_4; break;
+      case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
+      case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP_4; break;
+      case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP_4;  break;
+      case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE_4;  break;
+      case CmpInst::FCMP_UGT: SwapArgs = true;  BranchOpc = X86::JB_4;  break;
+      case CmpInst::FCMP_UGE: SwapArgs = true;  BranchOpc = X86::JBE_4; break;
+      case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4;  break;
+      case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
           
-      case CmpInst::ICMP_EQ:  SwapArgs = false; BranchOpc = X86::JE;  break;
-      case CmpInst::ICMP_NE:  SwapArgs = false; BranchOpc = X86::JNE; break;
-      case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA;  break;
-      case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE; break;
-      case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB;  break;
-      case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE; break;
-      case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG;  break;
-      case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE; break;
-      case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL;  break;
-      case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE; break;
+      case CmpInst::ICMP_EQ:  SwapArgs = false; BranchOpc = X86::JE_4;  break;
+      case CmpInst::ICMP_NE:  SwapArgs = false; BranchOpc = X86::JNE_4; break;
+      case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA_4;  break;
+      case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE_4; break;
+      case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4;  break;
+      case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
+      case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG_4;  break;
+      case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE_4; break;
+      case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL_4;  break;
+      case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE_4; break;
       default:
         return false;
       }
@@ -869,7 +877,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) {
       if (Predicate == CmpInst::FCMP_UNE) {
         // X86 requires a second branch to handle UNE (and OEQ,
         // which is mapped to UNE above).
-        BuildMI(MBB, DL, TII.get(X86::JP)).addMBB(TrueMBB);
+        BuildMI(MBB, DL, TII.get(X86::JP_4)).addMBB(TrueMBB);
       }
 
       FastEmitBranch(FalseMBB);
@@ -923,7 +931,8 @@ bool X86FastISel::X86SelectBranch(Instruction *I) {
           unsigned OpCode = SetMI->getOpcode();
 
           if (OpCode == X86::SETOr || OpCode == X86::SETBr) {
-            BuildMI(MBB, DL, TII.get(OpCode == X86::SETOr ? X86::JO : X86::JB))
+            BuildMI(MBB, DL, TII.get(OpCode == X86::SETOr ?
+                                        X86::JO_4 : X86::JB_4))
               .addMBB(TrueMBB);
             FastEmitBranch(FalseMBB);
             MBB->addSuccessor(TrueMBB);
@@ -939,7 +948,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) {
   if (OpReg == 0) return false;
 
   BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(OpReg).addReg(OpReg);
-  BuildMI(MBB, DL, TII.get(X86::JNE)).addMBB(TrueMBB);
+  BuildMI(MBB, DL, TII.get(X86::JNE_4)).addMBB(TrueMBB);
   FastEmitBranch(FalseMBB);
   MBB->addSuccessor(TrueMBB);
   return true;
@@ -948,7 +957,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) {
 bool X86FastISel::X86SelectShift(Instruction *I) {
   unsigned CReg = 0, OpReg = 0, OpImm = 0;
   const TargetRegisterClass *RC = NULL;
-  if (I->getType()->isInteger(8)) {
+  if (I->getType()->isIntegerTy(8)) {
     CReg = X86::CL;
     RC = &X86::GR8RegClass;
     switch (I->getOpcode()) {
@@ -957,7 +966,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) {
     case Instruction::Shl:  OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break;
     default: return false;
     }
-  } else if (I->getType()->isInteger(16)) {
+  } else if (I->getType()->isIntegerTy(16)) {
     CReg = X86::CX;
     RC = &X86::GR16RegClass;
     switch (I->getOpcode()) {
@@ -966,7 +975,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) {
     case Instruction::Shl:  OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break;
     default: return false;
     }
-  } else if (I->getType()->isInteger(32)) {
+  } else if (I->getType()->isIntegerTy(32)) {
     CReg = X86::ECX;
     RC = &X86::GR32RegClass;
     switch (I->getOpcode()) {
@@ -975,7 +984,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) {
     case Instruction::Shl:  OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break;
     default: return false;
     }
-  } else if (I->getType()->isInteger(64)) {
+  } else if (I->getType()->isIntegerTy(64)) {
     CReg = X86::RCX;
     RC = &X86::GR64RegClass;
     switch (I->getOpcode()) {
@@ -1160,6 +1169,8 @@ bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) {
     if (!X86SelectAddress(DI->getAddress(), AM))
       return false;
     const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
+    // FIXME may need to add RegState::Debug to any registers produced,
+    // although ESP/EBP should be the only ones at the moment.
     addFullAddress(BuildMI(MBB, DL, II), AM).addImm(0).
                                         addMetadata(DI->getVariable());
     return true;
diff --git a/lib/Target/X86/X86FixupKinds.h b/lib/Target/X86/X86FixupKinds.h
new file mode 100644
index 0000000..c8dac3c
--- /dev/null
+++ b/lib/Target/X86/X86FixupKinds.h
@@ -0,0 +1,25 @@
+//===-- X86/X86FixupKinds.h - X86 Specific Fixup Entries --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_X86_X86FIXUPKINDS_H
+#define LLVM_X86_X86FIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace X86 {
+enum Fixups {
+  reloc_pcrel_4byte = FirstTargetFixupKind,  // 32-bit pcrel, e.g. a branch.
+  reloc_pcrel_1byte,                         // 8-bit pcrel, e.g. branch_1
+  reloc_riprel_4byte                         // 32-bit rip-relative
+};
+}
+}
+
+#endif
diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp
index 34a0045..6a117dd 100644
--- a/lib/Target/X86/X86FloatingPointRegKill.cpp
+++ b/lib/Target/X86/X86FloatingPointRegKill.cpp
@@ -118,7 +118,7 @@ bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) {
         for (BasicBlock::const_iterator II = SI->begin();
              (PN = dyn_cast<PHINode>(II)); ++II) {
           if (PN->getType()==Type::getX86_FP80Ty(LLVMBB->getContext()) ||
-              (!Subtarget.hasSSE1() && PN->getType()->isFloatingPoint()) ||
+              (!Subtarget.hasSSE1() && PN->getType()->isFloatingPointTy()) ||
               (!Subtarget.hasSSE2() &&
                 PN->getType()==Type::getDoubleTy(LLVMBB->getContext()))) {
             ContainsFPCode = true;
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index e44ce421..3fad8ad 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -12,15 +12,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-// Force NDEBUG on in any optimized build on Darwin.
-//
-// FIXME: This is a huge hack, to work around ridiculously awful compile times
-// on this file with gcc-4.2 on Darwin, in Release mode.
-#if (!defined(__llvm__) && defined(__APPLE__) && \
-     defined(__OPTIMIZE__) && !defined(NDEBUG))
-#define NDEBUG
-#endif
-
 #define DEBUG_TYPE "x86-isel"
 #include "X86.h"
 #include "X86InstrBuilder.h"
@@ -177,14 +168,11 @@ namespace {
       return "X86 DAG->DAG Instruction Selection";
     }
 
-    /// InstructionSelect - This callback is invoked by
-    /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-    virtual void InstructionSelect();
-
     virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF);
 
-    virtual
-      bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, SDNode *Root) const;
+    virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const;
+
+    virtual void PreprocessISelDAG();
 
 // Include the pieces autogenerated from the target description.
 #include "X86GenDAGISel.inc"
@@ -208,18 +196,17 @@ namespace {
                        SDValue &Scale, SDValue &Index, SDValue &Disp);
     bool SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base,
                        SDValue &Scale, SDValue &Index, SDValue &Disp);
-    bool SelectScalarSSELoad(SDNode *Op, SDValue Pred,
-                             SDValue N, SDValue &Base, SDValue &Scale,
+    bool SelectScalarSSELoad(SDNode *Root, SDValue N,
+                             SDValue &Base, SDValue &Scale,
                              SDValue &Index, SDValue &Disp,
                              SDValue &Segment,
-                             SDValue &InChain, SDValue &OutChain);
+                             SDValue &NodeWithChain);
+    
     bool TryFoldLoad(SDNode *P, SDValue N,
                      SDValue &Base, SDValue &Scale,
                      SDValue &Index, SDValue &Disp,
                      SDValue &Segment);
-    void PreprocessForRMW();
-    void PreprocessForFPConvert();
-
+    
     /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
     /// inline asm expressions.
     virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
@@ -295,19 +282,22 @@ namespace {
     const X86InstrInfo *getInstrInfo() {
       return getTargetMachine().getInstrInfo();
     }
-
-#ifndef NDEBUG
-    unsigned Indent;
-#endif
   };
 }
 
 
-bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
-                                                 SDNode *Root) const {
+bool
+X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
   if (OptLevel == CodeGenOpt::None) return false;
 
-  if (U == Root)
+  if (!N.hasOneUse())
+    return false;
+
+  if (N.getOpcode() != ISD::LOAD)
+    return true;
+
+  // If N is a load, do additional profitability checks.
+  if (U == Root) {
     switch (U->getOpcode()) {
     default: break;
     case X86ISD::ADD:
@@ -354,60 +344,9 @@ bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
       }
     }
     }
-
-  // Proceed to 'generic' cycle finder code
-  return SelectionDAGISel::IsLegalAndProfitableToFold(N, U, Root);
-}
-
-/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand
-/// and move load below the TokenFactor. Replace store's chain operand with
-/// load's chain result.
-static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load,
-                                 SDValue Store, SDValue TF) {
-  SmallVector<SDValue, 4> Ops;
-  for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i)
-    if (Load.getNode() == TF.getOperand(i).getNode())
-      Ops.push_back(Load.getOperand(0));
-    else
-      Ops.push_back(TF.getOperand(i));
-  SDValue NewTF = CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size());
-  SDValue NewLoad = CurDAG->UpdateNodeOperands(Load, NewTF,
-                                               Load.getOperand(1),
-                                               Load.getOperand(2));
-  CurDAG->UpdateNodeOperands(Store, NewLoad.getValue(1), Store.getOperand(1),
-                             Store.getOperand(2), Store.getOperand(3));
-}
-
-/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG.  The 
-/// chain produced by the load must only be used by the store's chain operand,
-/// otherwise this may produce a cycle in the DAG.
-/// 
-static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address,
-                      SDValue &Load) {
-  if (N.getOpcode() == ISD::BIT_CONVERT) {
-    if (!N.hasOneUse())
-      return false;
-    N = N.getOperand(0);
   }
 
-  LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
-  if (!LD || LD->isVolatile())
-    return false;
-  if (LD->getAddressingMode() != ISD::UNINDEXED)
-    return false;
-
-  ISD::LoadExtType ExtType = LD->getExtensionType();
-  if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD)
-    return false;
-
-  if (N.hasOneUse() &&
-      LD->hasNUsesOfValue(1, 1) &&
-      N.getOperand(1) == Address &&
-      LD->isOperandOf(Chain.getNode())) {
-    Load = N;
-    return true;
-  }
-  return false;
+  return true;
 }
 
 /// MoveBelowCallSeqStart - Replace CALLSEQ_START operand with load's chain
@@ -473,51 +412,15 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain) {
   return false;
 }
 
-
-/// PreprocessForRMW - Preprocess the DAG to make instruction selection better.
-/// This is only run if not in -O0 mode.
-/// This allows the instruction selector to pick more read-modify-write
-/// instructions. This is a common case:
-///
-///     [Load chain]
-///         ^
-///         |
-///       [Load]
-///       ^    ^
-///       |    |
-///      /      \-
-///     /         |
-/// [TokenFactor] [Op]
-///     ^          ^
-///     |          |
-///      \        /
-///       \      /
-///       [Store]
-///
-/// The fact the store's chain operand != load's chain will prevent the
-/// (store (op (load))) instruction from being selected. We can transform it to:
-///
-///     [Load chain]
-///         ^
-///         |
-///    [TokenFactor]
-///         ^
-///         |
-///       [Load]
-///       ^    ^
-///       |    |
-///       |     \- 
-///       |       | 
-///       |     [Op]
-///       |       ^
-///       |       |
-///       \      /
-///        \    /
-///       [Store]
-void X86DAGToDAGISel::PreprocessForRMW() {
+void X86DAGToDAGISel::PreprocessISelDAG() {
+  // OptForSize is used in pattern predicates that isel is matching.
+  OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+  
   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
-         E = CurDAG->allnodes_end(); I != E; ++I) {
-    if (I->getOpcode() == X86ISD::CALL) {
+       E = CurDAG->allnodes_end(); I != E; ) {
+    SDNode *N = I++;  // Preincrement iterator to avoid invalidation issues.
+
+    if (OptLevel != CodeGenOpt::None && N->getOpcode() == X86ISD::CALL) {
       /// Also try moving call address load from outside callseq_start to just
       /// before the call to allow it to be folded.
       ///
@@ -537,85 +440,23 @@ void X86DAGToDAGISel::PreprocessForRMW() {
       ///      \        /
       ///       \      /
       ///       [CALL]
-      SDValue Chain = I->getOperand(0);
-      SDValue Load  = I->getOperand(1);
+      SDValue Chain = N->getOperand(0);
+      SDValue Load  = N->getOperand(1);
       if (!isCalleeLoad(Load, Chain))
         continue;
-      MoveBelowCallSeqStart(CurDAG, Load, SDValue(I, 0), Chain);
+      MoveBelowCallSeqStart(CurDAG, Load, SDValue(N, 0), Chain);
       ++NumLoadMoved;
       continue;
     }
-
-    if (!ISD::isNON_TRUNCStore(I))
-      continue;
-    SDValue Chain = I->getOperand(0);
-
-    if (Chain.getNode()->getOpcode() != ISD::TokenFactor)
-      continue;
-
-    SDValue N1 = I->getOperand(1);
-    SDValue N2 = I->getOperand(2);
-    if ((N1.getValueType().isFloatingPoint() &&
-         !N1.getValueType().isVector()) ||
-        !N1.hasOneUse())
-      continue;
-
-    bool RModW = false;
-    SDValue Load;
-    unsigned Opcode = N1.getNode()->getOpcode();
-    switch (Opcode) {
-    case ISD::ADD:
-    case ISD::MUL:
-    case ISD::AND:
-    case ISD::OR:
-    case ISD::XOR:
-    case ISD::ADDC:
-    case ISD::ADDE:
-    case ISD::VECTOR_SHUFFLE: {
-      SDValue N10 = N1.getOperand(0);
-      SDValue N11 = N1.getOperand(1);
-      RModW = isRMWLoad(N10, Chain, N2, Load);
-      if (!RModW)
-        RModW = isRMWLoad(N11, Chain, N2, Load);
-      break;
-    }
-    case ISD::SUB:
-    case ISD::SHL:
-    case ISD::SRA:
-    case ISD::SRL:
-    case ISD::ROTL:
-    case ISD::ROTR:
-    case ISD::SUBC:
-    case ISD::SUBE:
-    case X86ISD::SHLD:
-    case X86ISD::SHRD: {
-      SDValue N10 = N1.getOperand(0);
-      RModW = isRMWLoad(N10, Chain, N2, Load);
-      break;
-    }
-    }
-
-    if (RModW) {
-      MoveBelowTokenFactor(CurDAG, Load, SDValue(I, 0), Chain);
-      ++NumLoadMoved;
-      checkForCycles(I);
-    }
-  }
-}
-
-
-/// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend
-/// nodes that target the FP stack to be store and load to the stack.  This is a
-/// gross hack.  We would like to simply mark these as being illegal, but when
-/// we do that, legalize produces these when it expands calls, then expands
-/// these in the same legalize pass.  We would like dag combine to be able to
-/// hack on these between the call expansion and the node legalization.  As such
-/// this pass basically does "really late" legalization of these inline with the
-/// X86 isel pass.
-void X86DAGToDAGISel::PreprocessForFPConvert() {
-  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
-       E = CurDAG->allnodes_end(); I != E; ) {
-    SDNode *N = I++;  // Preincrement iterator to avoid invalidation issues.
+    
+    // Lower fpround and fpextend nodes that target the FP stack to be store and
+    // load to the stack.  This is a gross hack.  We would like to simply mark
+    // these as being illegal, but when we do that, legalize produces these when
+    // it expands calls, then expands these in the same legalize pass.  We would
+    // like dag combine to be able to hack on these between the call expansion
+    // and the node legalization.  As such this pass basically does "really
+    // late" legalization of these inline with the X86 isel pass.
+    // FIXME: This should only happen when not compiled with -O0.
     if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
       continue;
     
@@ -652,9 +493,10 @@ void X86DAGToDAGISel::PreprocessForFPConvert() {
     // FIXME: optimize the case where the src/dest is a load or store?
     SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl,
                                           N->getOperand(0),
-                                          MemTmp, NULL, 0, MemVT);
+                                          MemTmp, NULL, 0, MemVT,
+                                          false, false, 0);
     SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
-                                        NULL, 0, MemVT);
+                                        NULL, 0, MemVT, false, false, 0);
 
     // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
     // extload we created.  This will cause general havok on the dag because
@@ -670,30 +512,6 @@ void X86DAGToDAGISel::PreprocessForFPConvert() {
   }  
 }
 
-/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel
-/// when it has created a SelectionDAG for us to codegen.
-void X86DAGToDAGISel::InstructionSelect() {
-  const Function *F = MF->getFunction();
-  OptForSize = F->hasFnAttr(Attribute::OptimizeForSize);
-
-  if (OptLevel != CodeGenOpt::None)
-    PreprocessForRMW();
-
-  // FIXME: This should only happen when not compiled with -O0.
-  PreprocessForFPConvert();
-
-  // Codegen the basic block.
-#ifndef NDEBUG
-  DEBUG(dbgs() << "===== Instruction selection begins:\n");
-  Indent = 0;
-#endif
-  SelectRoot(*CurDAG);
-#ifndef NDEBUG
-  DEBUG(dbgs() << "===== Instruction selection ends:\n");
-#endif
-
-  CurDAG->RemoveDeadNodes();
-}
 
 /// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
 /// the main function.
@@ -1300,22 +1118,24 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, SDValue &Base,
 /// SelectScalarSSELoad - Match a scalar SSE load.  In particular, we want to
 /// match a load whose top elements are either undef or zeros.  The load flavor
 /// is derived from the type of N, which is either v4f32 or v2f64.
-bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Op, SDValue Pred,
+///
+/// We also return:
+///   PatternChainNode: this is the matched node that has a chain input and
+///   output.
+bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
                                           SDValue N, SDValue &Base,
                                           SDValue &Scale, SDValue &Index,
                                           SDValue &Disp, SDValue &Segment,
-                                          SDValue &InChain,
-                                          SDValue &OutChain) {
+                                          SDValue &PatternNodeWithChain) {
   if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
-    InChain = N.getOperand(0).getValue(1);
-    if (ISD::isNON_EXTLoad(InChain.getNode()) &&
-        InChain.getValue(0).hasOneUse() &&
-        N.hasOneUse() &&
-        IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op)) {
-      LoadSDNode *LD = cast<LoadSDNode>(InChain);
-      if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
+    PatternNodeWithChain = N.getOperand(0);
+    if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
+        PatternNodeWithChain.hasOneUse() &&
+        IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
+        IsLegalToFold(N.getOperand(0), N.getNode(), Root)) {
+      LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
+      if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp,Segment))
         return false;
-      OutChain = LD->getChain();
       return true;
     }
   }
@@ -1327,13 +1147,14 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Op, SDValue Pred,
       N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && 
       N.getOperand(0).getNode()->hasOneUse() &&
       ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) &&
-      N.getOperand(0).getOperand(0).hasOneUse()) {
+      N.getOperand(0).getOperand(0).hasOneUse() &&
+      IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
+      IsLegalToFold(N.getOperand(0), N.getNode(), Root)) {
     // Okay, this is a zero extending load.  Fold it.
     LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0));
-    if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
+    if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
       return false;
-    OutChain = LD->getChain();
-    InChain = SDValue(LD, 1);
+    PatternNodeWithChain = SDValue(LD, 0);
     return true;
   }
   return false;
@@ -1407,7 +1228,6 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N,
 bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base,
                                         SDValue &Scale, SDValue &Index,
                                         SDValue &Disp) {
-  assert(Op->getOpcode() == X86ISD::TLSADDR);
   assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
   const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
   
@@ -1434,11 +1254,12 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
                                   SDValue &Base, SDValue &Scale,
                                   SDValue &Index, SDValue &Disp,
                                   SDValue &Segment) {
-  if (ISD::isNON_EXTLoad(N.getNode()) &&
-      N.hasOneUse() &&
-      IsLegalAndProfitableToFold(N.getNode(), P, P))
-    return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment);
-  return false;
+  if (!ISD::isNON_EXTLoad(N.getNode()) ||
+      !IsProfitableToFold(N, P, P) ||
+      !IsLegalToFold(N, P, P))
+    return false;
+  
+  return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment);
 }
 
 /// getGlobalBaseReg - Return an SDNode that returns the value of
@@ -1541,7 +1362,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
       Opc = X86::LOCK_DEC16m;
     else if (isSub) {
       if (isCN) {
-        if (Predicate_i16immSExt8(Val.getNode()))
+        if (Predicate_immSext8(Val.getNode()))
           Opc = X86::LOCK_SUB16mi8;
         else
           Opc = X86::LOCK_SUB16mi;
@@ -1549,7 +1370,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
         Opc = X86::LOCK_SUB16mr;
     } else {
       if (isCN) {
-        if (Predicate_i16immSExt8(Val.getNode()))
+        if (Predicate_immSext8(Val.getNode()))
           Opc = X86::LOCK_ADD16mi8;
         else
           Opc = X86::LOCK_ADD16mi;
@@ -1564,7 +1385,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
       Opc = X86::LOCK_DEC32m;
     else if (isSub) {
       if (isCN) {
-        if (Predicate_i32immSExt8(Val.getNode()))
+        if (Predicate_immSext8(Val.getNode()))
           Opc = X86::LOCK_SUB32mi8;
         else
           Opc = X86::LOCK_SUB32mi;
@@ -1572,7 +1393,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
         Opc = X86::LOCK_SUB32mr;
     } else {
       if (isCN) {
-        if (Predicate_i32immSExt8(Val.getNode()))
+        if (Predicate_immSext8(Val.getNode()))
           Opc = X86::LOCK_ADD32mi8;
         else
           Opc = X86::LOCK_ADD32mi;
@@ -1588,7 +1409,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
     else if (isSub) {
       Opc = X86::LOCK_SUB64mr;
       if (isCN) {
-        if (Predicate_i64immSExt8(Val.getNode()))
+        if (Predicate_immSext8(Val.getNode()))
           Opc = X86::LOCK_SUB64mi8;
         else if (Predicate_i64immSExt32(Val.getNode()))
           Opc = X86::LOCK_SUB64mi32;
@@ -1596,7 +1417,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
     } else {
       Opc = X86::LOCK_ADD64mr;
       if (isCN) {
-        if (Predicate_i64immSExt8(Val.getNode()))
+        if (Predicate_immSext8(Val.getNode()))
           Opc = X86::LOCK_ADD64mi8;
         else if (Predicate_i64immSExt32(Val.getNode()))
           Opc = X86::LOCK_ADD64mi32;
@@ -1652,8 +1473,8 @@ static bool HasNoSignedComparisonUses(SDNode *N) {
       case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr:
       case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm:
       case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm:
-      case X86::JA: case X86::JAE: case X86::JB: case X86::JBE:
-      case X86::JE: case X86::JNE: case X86::JP: case X86::JNP:
+      case X86::JA_4: case X86::JAE_4: case X86::JB_4: case X86::JBE_4:
+      case X86::JE_4: case X86::JNE_4: case X86::JP_4: case X86::JNP_4:
       case X86::CMOVA16rr: case X86::CMOVA16rm:
       case X86::CMOVA32rr: case X86::CMOVA32rm:
       case X86::CMOVA64rr: case X86::CMOVA64rm:
@@ -1693,24 +1514,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
   unsigned Opcode = Node->getOpcode();
   DebugLoc dl = Node->getDebugLoc();
   
-#ifndef NDEBUG
-  DEBUG({
-      dbgs() << std::string(Indent, ' ') << "Selecting: ";
-      Node->dump(CurDAG);
-      dbgs() << '\n';
-    });
-  Indent += 2;
-#endif
+  DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n');
 
   if (Node->isMachineOpcode()) {
-#ifndef NDEBUG
-    DEBUG({
-        dbgs() << std::string(Indent-2, ' ') << "== ";
-        Node->dump(CurDAG);
-        dbgs() << '\n';
-      });
-    Indent -= 2;
-#endif
+    DEBUG(dbgs() << "== ";  Node->dump(CurDAG); dbgs() << '\n');
     return NULL;   // Already selected.
   }
 
@@ -1806,13 +1613,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
                                                 LoReg, NVT, InFlag);
       InFlag = Result.getValue(2);
       ReplaceUses(SDValue(Node, 0), Result);
-#ifndef NDEBUG
-      DEBUG({
-          dbgs() << std::string(Indent-2, ' ') << "=> ";
-          Result.getNode()->dump(CurDAG);
-          dbgs() << '\n';
-        });
-#endif
+      DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
     }
     // Copy the high half of the result, if it is needed.
     if (!SDValue(Node, 1).use_empty()) {
@@ -1835,19 +1636,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
         InFlag = Result.getValue(2);
       }
       ReplaceUses(SDValue(Node, 1), Result);
-#ifndef NDEBUG
-      DEBUG({
-          dbgs() << std::string(Indent-2, ' ') << "=> ";
-          Result.getNode()->dump(CurDAG);
-          dbgs() << '\n';
-        });
-#endif
+      DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
     }
 
-#ifndef NDEBUG
-    Indent -= 2;
-#endif
-
     return NULL;
   }
 
@@ -1962,13 +1753,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
                                                 LoReg, NVT, InFlag);
       InFlag = Result.getValue(2);
       ReplaceUses(SDValue(Node, 0), Result);
-#ifndef NDEBUG
-      DEBUG({
-          dbgs() << std::string(Indent-2, ' ') << "=> ";
-          Result.getNode()->dump(CurDAG);
-          dbgs() << '\n';
-        });
-#endif
+      DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
     }
     // Copy the remainder (high) result, if it is needed.
     if (!SDValue(Node, 1).use_empty()) {
@@ -1992,19 +1777,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
         InFlag = Result.getValue(2);
       }
       ReplaceUses(SDValue(Node, 1), Result);
-#ifndef NDEBUG
-      DEBUG({
-          dbgs() << std::string(Indent-2, ' ') << "=> ";
-          Result.getNode()->dump(CurDAG);
-          dbgs() << '\n';
-        });
-#endif
+      DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
     }
-
-#ifndef NDEBUG
-    Indent -= 2;
-#endif
-
     return NULL;
   }
 
@@ -2117,17 +1891,12 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
 
   SDNode *ResNode = SelectCode(Node);
 
-#ifndef NDEBUG
-  DEBUG({
-      dbgs() << std::string(Indent-2, ' ') << "=> ";
-      if (ResNode == NULL || ResNode == Node)
-        Node->dump(CurDAG);
-      else
-        ResNode->dump(CurDAG);
-      dbgs() << '\n';
-    });
-  Indent -= 2;
-#endif
+  DEBUG(dbgs() << "=> ";
+        if (ResNode == NULL || ResNode == Node)
+          Node->dump(CurDAG);
+        else
+          ResNode->dump(CurDAG);
+        dbgs() << '\n');
 
   return ResNode;
 }
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 515bc84..802bedc 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -73,15 +73,16 @@ static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
   case X86Subtarget::isDarwin:
     if (TM.getSubtarget<X86Subtarget>().is64Bit())
       return new X8664_MachoTargetObjectFile();
-    return new X8632_MachoTargetObjectFile();
+    return new TargetLoweringObjectFileMachO();
   case X86Subtarget::isELF:
-    return new TargetLoweringObjectFileELF();
+   if (TM.getSubtarget<X86Subtarget>().is64Bit())
+     return new X8664_ELFTargetObjectFile(TM);
+    return new X8632_ELFTargetObjectFile(TM);
   case X86Subtarget::isMingw:
   case X86Subtarget::isCygwin:
   case X86Subtarget::isWindows:
     return new TargetLoweringObjectFileCOFF();
   }
-
 }
 
 X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
@@ -1001,19 +1002,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
 
   computeRegisterProperties();
 
-  // Divide and reminder operations have no vector equivalent and can
-  // trap. Do a custom widening for these operations in which we never
-  // generate more divides/remainder than the original vector width.
-  for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
-       VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
-    if (!isTypeLegal((MVT::SimpleValueType)VT)) {
-      setOperationAction(ISD::SDIV, (MVT::SimpleValueType) VT, Custom);
-      setOperationAction(ISD::UDIV, (MVT::SimpleValueType) VT, Custom);
-      setOperationAction(ISD::SREM, (MVT::SimpleValueType) VT, Custom);
-      setOperationAction(ISD::UREM, (MVT::SimpleValueType) VT, Custom);
-    }
-  }
-
   // FIXME: These should be based on subtarget info. Plus, the values should
   // be smaller when we are in optimizing for size mode.
   maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
@@ -1411,18 +1399,6 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
     return CC_X86_32_C;
 }
 
-/// NameDecorationForCallConv - Selects the appropriate decoration to
-/// apply to a MachineFunction containing a given calling convention.
-NameDecorationStyle
-X86TargetLowering::NameDecorationForCallConv(CallingConv::ID CallConv) {
-  if (CallConv == CallingConv::X86_FastCall)
-    return FastCall;
-  else if (CallConv == CallingConv::X86_StdCall)
-    return StdCall;
-  return None;
-}
-
-
 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
 /// by "Src" to address "Dst" with size and alignment information specified by
 /// the specific parameter attribute. The copy will be passed as a byval
@@ -1476,7 +1452,8 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
                                     VA.getLocMemOffset(), isImmutable, false);
     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
     return DAG.getLoad(ValVT, dl, Chain, FIN,
-                       PseudoSourceValue::getFixedStack(FI), 0);
+                       PseudoSourceValue::getFixedStack(FI), 0,
+                       false, false, 0);
   }
 }
 
@@ -1498,9 +1475,6 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
       Fn->getName() == "main")
     FuncInfo->setForceFramePointer(true);
 
-  // Decorate the function name.
-  FuncInfo->setDecorationStyle(NameDecorationForCallConv(CallConv));
-
   MachineFrameInfo *MFI = MF.getFrameInfo();
   bool Is64Bit = Subtarget->is64Bit();
   bool IsWin64 = Subtarget->isTargetWin64();
@@ -1573,7 +1547,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
 
     // If value is passed via pointer - do a load.
     if (VA.getLocInfo() == CCValAssign::Indirect)
-      ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, NULL, 0);
+      ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, NULL, 0,
+                             false, false, 0);
 
     InVals.push_back(ArgValue);
   }
@@ -1668,7 +1643,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
         SDValue Store =
           DAG.getStore(Val.getValue(1), dl, Val, FIN,
                        PseudoSourceValue::getFixedStack(RegSaveFrameIndex),
-                       Offset);
+                       Offset, false, false, 0);
         MemOps.push_back(Store);
         Offset += 8;
       }
@@ -1737,7 +1712,8 @@ X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
     return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
   }
   return DAG.getStore(Chain, dl, Arg, PtrOff,
-                      PseudoSourceValue::getStack(), LocMemOffset);
+                      PseudoSourceValue::getStack(), LocMemOffset,
+                      false, false, 0);
 }
 
 /// EmitTailCallLoadRetAddr - Emit a load of return address if tail call
@@ -1752,7 +1728,7 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
   OutRetAddr = getReturnAddressFrameIndex(DAG);
 
   // Load the "old" Return address.
-  OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, NULL, 0);
+  OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, NULL, 0, false, false, 0);
   return SDValue(OutRetAddr.getNode(), 1);
 }
 
@@ -1767,11 +1743,12 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
   // Calculate the new stack slot for the return address.
   int SlotSize = Is64Bit ? 8 : 4;
   int NewReturnAddrFI =
-    MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, true,false);
+    MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, false, false);
   EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
   SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
   Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
-                       PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0);
+                       PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0,
+                       false, false, 0);
   return Chain;
 }
 
@@ -1882,7 +1859,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
       Chain = DAG.getStore(Chain, dl, Arg, SpillSlot,
-                           PseudoSourceValue::getFixedStack(FI), 0);
+                           PseudoSourceValue::getFixedStack(FI), 0,
+                           false, false, 0);
       Arg = SpillSlot;
       break;
     }
@@ -2013,7 +1991,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
           // Store relative to framepointer.
           MemOpChains2.push_back(
             DAG.getStore(ArgChain, dl, Arg, FIN,
-                         PseudoSourceValue::getFixedStack(FI), 0));
+                         PseudoSourceValue::getFixedStack(FI), 0,
+                         false, false, 0));
         }
       }
     }
@@ -2256,7 +2235,8 @@ static
 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
                          MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
                          const X86InstrInfo *TII) {
-  int FI;
+  unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
+  int FI = INT_MAX;
   if (Arg.getOpcode() == ISD::CopyFromReg) {
     unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
     if (!VR || TargetRegisterInfo::isPhysicalRegister(VR))
@@ -2272,25 +2252,30 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
       if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r) &&
           Def->getOperand(1).isFI()) {
         FI = Def->getOperand(1).getIndex();
-        if (MFI->getObjectSize(FI) != Flags.getByValSize())
-          return false;
+        Bytes = Flags.getByValSize();
       } else
         return false;
     }
-  } else {
-    LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg);
-    if (!Ld)
+  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
+    if (Flags.isByVal())
+      // ByVal argument is passed in as a pointer but it's now being
+      // dereferenced. e.g.
+      // define @foo(%struct.X* %A) {
+      //   tail call @bar(%struct.X* byval %A)
+      // }
       return false;
     SDValue Ptr = Ld->getBasePtr();
     FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
     if (!FINode)
       return false;
     FI = FINode->getIndex();
-  }
+  } else
+    return false;
 
+  assert(FI != INT_MAX);
   if (!MFI->isFixedObjectIndex(FI))
     return false;
-  return Offset == MFI->getObjectOffset(FI);
+  return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
 }
 
 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
@@ -2397,7 +2382,7 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
     // Set up a frame object for the return address.
     uint64_t SlotSize = TD->getPointerSize();
     ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
-                                                           true, false);
+                                                           false, false);
     FuncInfo->setRAIndex(ReturnAddrIndex);
   }
 
@@ -3592,7 +3577,8 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
     int EltNo = (Offset - StartOffset) >> 2;
     int Mask[4] = { EltNo, EltNo, EltNo, EltNo };
     EVT VT = (PVT == MVT::i32) ? MVT::v4i32 : MVT::v4f32;
-    SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,LD->getSrcValue(),0);
+    SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,LD->getSrcValue(),0,
+                             false, false, 0);
     // Canonicalize it to a v4i32 shuffle.
     V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, V1);
     return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
@@ -4836,8 +4822,16 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){
 
   if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) &&
       isa<ConstantSDNode>(N2)) {
-    unsigned Opc = (EltVT.getSizeInBits() == 8) ? X86ISD::PINSRB
-                                                : X86ISD::PINSRW;
+    unsigned Opc;
+    if (VT == MVT::v8i16)
+      Opc = X86ISD::PINSRW;
+    else if (VT == MVT::v4i16)
+      Opc = X86ISD::MMX_PINSRW;
+    else if (VT == MVT::v16i8)
+      Opc = X86ISD::PINSRB;
+    else
+      Opc = X86ISD::PINSRB;
+
     // Transform it so it match pinsr{b,w} which expects a GR32 as its second
     // argument.
     if (N1.getValueType() != MVT::i32)
@@ -4888,7 +4882,8 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
       N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
     if (N2.getValueType() != MVT::i32)
       N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
-    return DAG.getNode(X86ISD::PINSRW, dl, VT, N0, N1, N2);
+    return DAG.getNode(VT == MVT::v8i16 ? X86ISD::PINSRW : X86ISD::MMX_PINSRW,
+                       dl, VT, N0, N1, N2);
   }
   return SDValue();
 }
@@ -5091,7 +5086,7 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
   // load.
   if (isGlobalStubReference(OpFlags))
     Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
-                         PseudoSourceValue::getGOT(), 0);
+                         PseudoSourceValue::getGOT(), 0, false, false, 0);
 
   // If there was a non-zero offset that we didn't fold, create an explicit
   // addition for it.
@@ -5171,7 +5166,7 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
                                              MVT::i32));
 
   SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Base,
-                                      NULL, 0);
+                                      NULL, 0, false, false, 0);
 
   unsigned char OperandFlags = 0;
   // Most TLS accesses are not RIP relative, even on x86-64.  One exception is
@@ -5196,7 +5191,7 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
 
   if (model == TLSModel::InitialExec)
     Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset,
-                         PseudoSourceValue::getGOT(), 0);
+                         PseudoSourceValue::getGOT(), 0, false, false, 0);
 
   // The address of the thread local variable is the add of the thread
   // pointer with the offset of the variable.
@@ -5264,7 +5259,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) {
 
   SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
                                 DAG.getConstant(VTBits, MVT::i8));
-  SDValue Cond = DAG.getNode(X86ISD::CMP, dl, VT,
+  SDValue Cond = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
                              AndNode, DAG.getConstant(0, MVT::i8));
 
   SDValue Hi, Lo;
@@ -5313,7 +5308,8 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
   SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
   SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
                                StackSlot,
-                               PseudoSourceValue::getFixedStack(SSFI), 0);
+                               PseudoSourceValue::getFixedStack(SSFI), 0,
+                               false, false, 0);
   return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
 }
 
@@ -5348,7 +5344,8 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
     };
     Chain = DAG.getNode(X86ISD::FST, dl, Tys, Ops, array_lengthof(Ops));
     Result = DAG.getLoad(Op.getValueType(), dl, Chain, StackSlot,
-                         PseudoSourceValue::getFixedStack(SSFI), 0);
+                         PseudoSourceValue::getFixedStack(SSFI), 0,
+                         false, false, 0);
   }
 
   return Result;
@@ -5421,12 +5418,12 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) {
   SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, XR1, XR2);
   SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
                               PseudoSourceValue::getConstantPool(), 0,
-                              false, 16);
+                              false, false, 16);
   SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0);
   SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Unpck2);
   SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1,
                               PseudoSourceValue::getConstantPool(), 0,
-                              false, 16);
+                              false, false, 16);
   SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
 
   // Add the halves; easiest way is to swap them into another reg first.
@@ -5513,9 +5510,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
   SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl,
                                    getPointerTy(), StackSlot, WordOff);
   SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
-                                StackSlot, NULL, 0);
+                                StackSlot, NULL, 0, false, false, 0);
   SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32),
-                                OffsetSlot, NULL, 0);
+                                OffsetSlot, NULL, 0, false, false, 0);
   return BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
 }
 
@@ -5563,7 +5560,8 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) {
   if (isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) {
     assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!");
     Chain = DAG.getStore(Chain, dl, Value, StackSlot,
-                         PseudoSourceValue::getFixedStack(SSFI), 0);
+                         PseudoSourceValue::getFixedStack(SSFI), 0,
+                         false, false, 0);
     SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
     SDValue Ops[] = {
       Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType())
@@ -5597,7 +5595,7 @@ SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
 
   // Load the result.
   return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
-                     FIST, StackSlot, NULL, 0);
+                     FIST, StackSlot, NULL, 0, false, false, 0);
 }
 
 SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) {
@@ -5607,7 +5605,7 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) {
 
   // Load the result.
   return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
-                     FIST, StackSlot, NULL, 0);
+                     FIST, StackSlot, NULL, 0, false, false, 0);
 }
 
 SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) {
@@ -5632,8 +5630,8 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) {
   Constant *C = ConstantVector::get(CV);
   SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
   SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
-                               PseudoSourceValue::getConstantPool(), 0,
-                               false, 16);
+                             PseudoSourceValue::getConstantPool(), 0,
+                             false, false, 16);
   return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask);
 }
 
@@ -5659,8 +5657,8 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) {
   Constant *C = ConstantVector::get(CV);
   SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
   SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
-                               PseudoSourceValue::getConstantPool(), 0,
-                               false, 16);
+                             PseudoSourceValue::getConstantPool(), 0,
+                             false, false, 16);
   if (VT.isVector()) {
     return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
                        DAG.getNode(ISD::XOR, dl, MVT::v2i64,
@@ -5708,8 +5706,8 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
   Constant *C = ConstantVector::get(CV);
   SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
   SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx,
-                                PseudoSourceValue::getConstantPool(), 0,
-                                false, 16);
+                              PseudoSourceValue::getConstantPool(), 0,
+                              false, false, 16);
   SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, SrcVT, Op1, Mask1);
 
   // Shift sign bit right or left if the two operands have different types.
@@ -5737,8 +5735,8 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
   C = ConstantVector::get(CV);
   CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
   SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
-                                PseudoSourceValue::getConstantPool(), 0,
-                                false, 16);
+                              PseudoSourceValue::getConstantPool(), 0,
+                              false, false, 16);
   SDValue Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Mask2);
 
   // Or the value with the sign bit.
@@ -5890,26 +5888,31 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
 
 /// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node
 /// if it's possible.
-static SDValue LowerToBT(SDValue Op0, ISD::CondCode CC,
+static SDValue LowerToBT(SDValue And, ISD::CondCode CC,
                          DebugLoc dl, SelectionDAG &DAG) {
+  SDValue Op0 = And.getOperand(0);
+  SDValue Op1 = And.getOperand(1);
+  if (Op0.getOpcode() == ISD::TRUNCATE)
+    Op0 = Op0.getOperand(0);
+  if (Op1.getOpcode() == ISD::TRUNCATE)
+    Op1 = Op1.getOperand(0);
+
   SDValue LHS, RHS;
-  if (Op0.getOperand(1).getOpcode() == ISD::SHL) {
-    if (ConstantSDNode *Op010C =
-        dyn_cast<ConstantSDNode>(Op0.getOperand(1).getOperand(0)))
-      if (Op010C->getZExtValue() == 1) {
-        LHS = Op0.getOperand(0);
-        RHS = Op0.getOperand(1).getOperand(1);
+  if (Op1.getOpcode() == ISD::SHL) {
+    if (ConstantSDNode *And10C = dyn_cast<ConstantSDNode>(Op1.getOperand(0)))
+      if (And10C->getZExtValue() == 1) {
+        LHS = Op0;
+        RHS = Op1.getOperand(1);
       }
-  } else if (Op0.getOperand(0).getOpcode() == ISD::SHL) {
-    if (ConstantSDNode *Op000C =
-        dyn_cast<ConstantSDNode>(Op0.getOperand(0).getOperand(0)))
-      if (Op000C->getZExtValue() == 1) {
-        LHS = Op0.getOperand(1);
-        RHS = Op0.getOperand(0).getOperand(1);
+  } else if (Op0.getOpcode() == ISD::SHL) {
+    if (ConstantSDNode *And00C = dyn_cast<ConstantSDNode>(Op0.getOperand(0)))
+      if (And00C->getZExtValue() == 1) {
+        LHS = Op1;
+        RHS = Op0.getOperand(1);
       }
-  } else if (Op0.getOperand(1).getOpcode() == ISD::Constant) {
-    ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op0.getOperand(1));
-    SDValue AndLHS = Op0.getOperand(0);
+  } else if (Op1.getOpcode() == ISD::Constant) {
+    ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op1);
+    SDValue AndLHS = Op0;
     if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) {
       LHS = AndLHS.getOperand(0);
       RHS = AndLHS.getOperand(1);
@@ -5959,6 +5962,21 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
       return NewSetCC;
   }
 
+  // Look for "(setcc) == / != 1" to avoid unncessary setcc.
+  if (Op0.getOpcode() == X86ISD::SETCC &&
+      Op1.getOpcode() == ISD::Constant &&
+      (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
+       cast<ConstantSDNode>(Op1)->isNullValue()) &&
+      (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+    X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
+    bool Invert = (CC == ISD::SETNE) ^
+      cast<ConstantSDNode>(Op1)->isNullValue();
+    if (Invert)
+      CCode = X86::GetOppositeBranchCondition(CCode);
+    return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+                       DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1));
+  }
+
   bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();
   unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG);
   if (X86CC == X86::COND_INVALID)
@@ -6400,24 +6418,13 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
   EVT IntPtr = getPointerTy();
   EVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
 
-  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true));
-
   Chain = DAG.getCopyToReg(Chain, dl, X86::EAX, Size, Flag);
   Flag = Chain.getValue(1);
 
-  SDVTList  NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
-  SDValue Ops[] = { Chain,
-                      DAG.getTargetExternalSymbol("_alloca", IntPtr),
-                      DAG.getRegister(X86::EAX, IntPtr),
-                      DAG.getRegister(X86StackPtr, SPTy),
-                      Flag };
-  Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops, 5);
-  Flag = Chain.getValue(1);
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
 
-  Chain = DAG.getCALLSEQ_END(Chain,
-                             DAG.getIntPtrConstant(0, true),
-                             DAG.getIntPtrConstant(0, true),
-                             Flag);
+  Chain = DAG.getNode(X86ISD::MINGW_ALLOCA, dl, NodeTys, Chain, Flag);
+  Flag = Chain.getValue(1);
 
   Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, SPTy).getValue(1);
 
@@ -6461,8 +6468,7 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
         LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()),
                     false, false, false, false,
                     0, CallingConv::C, false, /*isReturnValueUsed=*/false,
-                    DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl,
-                    DAG.GetOrdering(Chain.getNode()));
+                    DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl);
       return CallResult.second;
     }
 
@@ -6646,7 +6652,8 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) {
     // vastart just stores the address of the VarArgsFrameIndex slot into the
     // memory location argument.
     SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
-    return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0);
+    return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0,
+                        false, false, 0);
   }
 
   // __va_list_tag:
@@ -6658,8 +6665,8 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) {
   SDValue FIN = Op.getOperand(1);
   // Store gp_offset
   SDValue Store = DAG.getStore(Op.getOperand(0), dl,
-                                 DAG.getConstant(VarArgsGPOffset, MVT::i32),
-                                 FIN, SV, 0);
+                               DAG.getConstant(VarArgsGPOffset, MVT::i32),
+                               FIN, SV, 0, false, false, 0);
   MemOps.push_back(Store);
 
   // Store fp_offset
@@ -6667,21 +6674,23 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) {
                     FIN, DAG.getIntPtrConstant(4));
   Store = DAG.getStore(Op.getOperand(0), dl,
                        DAG.getConstant(VarArgsFPOffset, MVT::i32),
-                       FIN, SV, 0);
+                       FIN, SV, 0, false, false, 0);
   MemOps.push_back(Store);
 
   // Store ptr to overflow_arg_area
   FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(),
                     FIN, DAG.getIntPtrConstant(4));
   SDValue OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
-  Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 0);
+  Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 0,
+                       false, false, 0);
   MemOps.push_back(Store);
 
   // Store ptr to reg_save_area.
   FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(),
                     FIN, DAG.getIntPtrConstant(8));
   SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
-  Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 0);
+  Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 0,
+                       false, false, 0);
   MemOps.push_back(Store);
   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
                      &MemOps[0], MemOps.size());
@@ -6967,13 +6976,13 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
     return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
                        DAG.getNode(ISD::ADD, dl, getPointerTy(),
                                    FrameAddr, Offset),
-                       NULL, 0);
+                       NULL, 0, false, false, 0);
   }
 
   // Just load the return address.
   SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
   return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
-                     RetAddrFI, NULL, 0);
+                     RetAddrFI, NULL, 0, false, false, 0);
 }
 
 SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
@@ -6985,7 +6994,8 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
   unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP;
   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
   while (Depth--)
-    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0);
+    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0,
+                            false, false, 0);
   return FrameAddr;
 }
 
@@ -7009,7 +7019,7 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG)
   SDValue StoreAddr = DAG.getNode(ISD::SUB, dl, getPointerTy(), Frame,
                                   DAG.getIntPtrConstant(-TD->getPointerSize()));
   StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset);
-  Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, NULL, 0);
+  Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, NULL, 0, false, false, 0);
   Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr);
   MF.getRegInfo().addLiveOut(StoreAddrReg);
 
@@ -7044,11 +7054,12 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
     unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11
     SDValue Addr = Trmp;
     OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
-                                Addr, TrmpAddr, 0);
+                                Addr, TrmpAddr, 0, false, false, 0);
 
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
                        DAG.getConstant(2, MVT::i64));
-    OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, TrmpAddr, 2, false, 2);
+    OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, TrmpAddr, 2,
+                                false, false, 2);
 
     // Load the 'nest' parameter value into R10.
     // R10 is specified in X86CallingConv.td
@@ -7056,24 +7067,25 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
                        DAG.getConstant(10, MVT::i64));
     OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
-                                Addr, TrmpAddr, 10);
+                                Addr, TrmpAddr, 10, false, false, 0);
 
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
                        DAG.getConstant(12, MVT::i64));
-    OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 12, false, 2);
+    OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 12,
+                                false, false, 2);
 
     // Jump to the nested function.
     OpCode = (JMP64r << 8) | REX_WB; // jmpq *...
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
                        DAG.getConstant(20, MVT::i64));
     OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
-                                Addr, TrmpAddr, 20);
+                                Addr, TrmpAddr, 20, false, false, 0);
 
     unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
                        DAG.getConstant(22, MVT::i64));
     OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, MVT::i8), Addr,
-                                TrmpAddr, 22);
+                                TrmpAddr, 22, false, false, 0);
 
     SDValue Ops[] =
       { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6) };
@@ -7133,21 +7145,23 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
     const unsigned char N86Reg = RegInfo->getX86RegNum(NestReg);
     OutChains[0] = DAG.getStore(Root, dl,
                                 DAG.getConstant(MOV32ri|N86Reg, MVT::i8),
-                                Trmp, TrmpAddr, 0);
+                                Trmp, TrmpAddr, 0, false, false, 0);
 
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
                        DAG.getConstant(1, MVT::i32));
-    OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 1, false, 1);
+    OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 1,
+                                false, false, 1);
 
     const unsigned char JMP = 0xE9; // jmp <32bit dst> opcode.
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
                        DAG.getConstant(5, MVT::i32));
     OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, MVT::i8), Addr,
-                                TrmpAddr, 5, false, 1);
+                                TrmpAddr, 5, false, false, 1);
 
     Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
                        DAG.getConstant(6, MVT::i32));
-    OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, TrmpAddr, 6, false, 1);
+    OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, TrmpAddr, 6,
+                                false, false, 1);
 
     SDValue Ops[] =
       { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 4) };
@@ -7190,7 +7204,8 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) {
                               DAG.getEntryNode(), StackSlot);
 
   // Load FP Control Word from stack slot
-  SDValue CWD = DAG.getLoad(MVT::i16, dl, Chain, StackSlot, NULL, 0);
+  SDValue CWD = DAG.getLoad(MVT::i16, dl, Chain, StackSlot, NULL, 0,
+                            false, false, 0);
 
   // Transform as necessary
   SDValue CWD1 =
@@ -7554,7 +7569,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     if (FIST.getNode() != 0) {
       EVT VT = N->getValueType(0);
       // Return a load from the stack slot.
-      Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, NULL, 0));
+      Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, NULL, 0,
+                                    false, false, 0));
     }
     return;
   }
@@ -7572,14 +7588,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     Results.push_back(edx.getValue(1));
     return;
   }
-  case ISD::SDIV:
-  case ISD::UDIV:
-  case ISD::SREM:
-  case ISD::UREM: {
-    EVT WidenVT = getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-    Results.push_back(DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()));
-    return;
-  }
   case ISD::ATOMIC_CMP_SWAP: {
     EVT T = N->getValueType(0);
     assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap");
@@ -7677,6 +7685,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::INSERTPS:           return "X86ISD::INSERTPS";
   case X86ISD::PINSRB:             return "X86ISD::PINSRB";
   case X86ISD::PINSRW:             return "X86ISD::PINSRW";
+  case X86ISD::MMX_PINSRW:         return "X86ISD::MMX_PINSRW";
   case X86ISD::PSHUFB:             return "X86ISD::PSHUFB";
   case X86ISD::FMAX:               return "X86ISD::FMAX";
   case X86ISD::FMIN:               return "X86ISD::FMIN";
@@ -7721,6 +7730,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::MUL_IMM:            return "X86ISD::MUL_IMM";
   case X86ISD::PTEST:              return "X86ISD::PTEST";
   case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
+  case X86ISD::MINGW_ALLOCA:       return "X86ISD::MINGW_ALLOCA";
   }
 }
 
@@ -7778,13 +7788,13 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
 
 
 bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const {
-  if (!Ty1->isInteger() || !Ty2->isInteger())
+  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
     return false;
   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
   if (NumBits1 <= NumBits2)
     return false;
-  return Subtarget->is64Bit() || NumBits1 < 64;
+  return true;
 }
 
 bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
@@ -7794,12 +7804,12 @@ bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
   unsigned NumBits2 = VT2.getSizeInBits();
   if (NumBits1 <= NumBits2)
     return false;
-  return Subtarget->is64Bit() || NumBits1 < 64;
+  return true;
 }
 
 bool X86TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const {
   // x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
-  return Ty1->isInteger(32) && Ty2->isInteger(64) && Subtarget->is64Bit();
+  return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget->is64Bit();
 }
 
 bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
@@ -7955,7 +7965,7 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
   MIB.addReg(EAXreg);
 
   // insert branch
-  BuildMI(newMBB, dl, TII->get(X86::JNE)).addMBB(newMBB);
+  BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
 
   F->DeleteMachineInstr(bInstr);   // The pseudo instruction is gone now.
   return nextMBB;
@@ -8112,7 +8122,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
   MIB.addReg(X86::EDX);
 
   // insert branch
-  BuildMI(newMBB, dl, TII->get(X86::JNE)).addMBB(newMBB);
+  BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
 
   F->DeleteMachineInstr(bInstr);   // The pseudo instruction is gone now.
   return nextMBB;
@@ -8215,7 +8225,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
   MIB.addReg(X86::EAX);
 
   // insert branch
-  BuildMI(newMBB, dl, TII->get(X86::JNE)).addMBB(newMBB);
+  BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
 
   F->DeleteMachineInstr(mInstr);   // The pseudo instruction is gone now.
   return nextMBB;
@@ -8297,7 +8307,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
   if (!Subtarget->isTargetWin64()) {
     // If %al is 0, branch around the XMM save block.
     BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg);
-    BuildMI(MBB, DL, TII->get(X86::JE)).addMBB(EndMBB);
+    BuildMI(MBB, DL, TII->get(X86::JE_4)).addMBB(EndMBB);
     MBB->addSuccessor(EndMBB);
   }
 
@@ -8390,6 +8400,29 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
   return BB;
 }
 
+MachineBasicBlock *
+X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI,
+                                          MachineBasicBlock *BB,
+                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc DL = MI->getDebugLoc();
+  MachineFunction *F = BB->getParent();
+
+  // The lowering is pretty easy: we're just emitting the call to _alloca.  The
+  // non-trivial part is impdef of ESP.
+  // FIXME: The code should be tweaked as soon as we'll try to do codegen for
+  // mingw-w64.
+
+  BuildMI(BB, DL, TII->get(X86::CALLpcrel32))
+    .addExternalSymbol("_alloca")
+    .addReg(X86::EAX, RegState::Implicit)
+    .addReg(X86::ESP, RegState::Implicit)
+    .addReg(X86::EAX, RegState::Define | RegState::Implicit)
+    .addReg(X86::ESP, RegState::Define | RegState::Implicit);
+
+  F->DeleteMachineInstr(MI);   // The pseudo instruction is gone now.
+  return BB;
+}
 
 MachineBasicBlock *
 X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
@@ -8397,6 +8430,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
   switch (MI->getOpcode()) {
   default: assert(false && "Unexpected instr type to insert");
+  case X86::MINGW_ALLOCA:
+    return EmitLoweredMingwAlloca(MI, BB, EM);
   case X86::CMOV_GR8:
   case X86::CMOV_V1I64:
   case X86::CMOV_FR32:
@@ -8783,10 +8818,11 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
     if (DAG.InferPtrAlignment(LD->getBasePtr()) >= 16)
       return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(),
                          LD->getSrcValue(), LD->getSrcValueOffset(),
-                         LD->isVolatile());
+                         LD->isVolatile(), LD->isNonTemporal(), 0);
     return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(),
                        LD->getSrcValue(), LD->getSrcValueOffset(),
-                       LD->isVolatile(), LD->getAlignment());
+                       LD->isVolatile(), LD->isNonTemporal(),
+                       LD->getAlignment());
   } else if (NumElems == 4 && LastLoadedElt == 1) {
     SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
     SDValue Ops[] = { LD->getChain(), LD->getBasePtr() };
@@ -8806,10 +8842,9 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
   SDValue RHS = N->getOperand(2);
 
   // If we have SSE[12] support, try to form min/max nodes. SSE min/max
-  // instructions have the peculiarity that if either operand is a NaN,
-  // they chose what we call the RHS operand (and as such are not symmetric).
-  // It happens that this matches the semantics of the common C idiom
-  // x<y?x:y and related forms, so we can recognize these cases.
+  // instructions match the semantics of the common C idiom x<y?x:y but not
+  // x<=y?x:y, because of how they handle negative zero (which can be
+  // ignored in unsafe-math mode).
   if (Subtarget->hasSSE2() &&
       (LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64) &&
       Cond.getOpcode() == ISD::SETCC) {
@@ -8817,36 +8852,34 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
 
     unsigned Opcode = 0;
     // Check for x CC y ? x : y.
-    if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) {
+    if (DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
+        DAG.isEqualTo(RHS, Cond.getOperand(1))) {
       switch (CC) {
       default: break;
       case ISD::SETULT:
-        // This can be a min if we can prove that at least one of the operands
-        // is not a nan.
-        if (!FiniteOnlyFPMath()) {
-          if (DAG.isKnownNeverNaN(RHS)) {
-            // Put the potential NaN in the RHS so that SSE will preserve it.
-            std::swap(LHS, RHS);
-          } else if (!DAG.isKnownNeverNaN(LHS))
+        // Converting this to a min would handle NaNs incorrectly, and swapping
+        // the operands would cause it to handle comparisons between positive
+        // and negative zero incorrectly.
+        if (!FiniteOnlyFPMath() &&
+            (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) {
+          if (!UnsafeFPMath &&
+              !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
             break;
+          std::swap(LHS, RHS);
         }
         Opcode = X86ISD::FMIN;
         break;
       case ISD::SETOLE:
-        // This can be a min if we can prove that at least one of the operands
-        // is not a nan.
-        if (!FiniteOnlyFPMath()) {
-          if (DAG.isKnownNeverNaN(LHS)) {
-            // Put the potential NaN in the RHS so that SSE will preserve it.
-            std::swap(LHS, RHS);
-          } else if (!DAG.isKnownNeverNaN(RHS))
-            break;
-        }
+        // Converting this to a min would handle comparisons between positive
+        // and negative zero incorrectly.
+        if (!UnsafeFPMath &&
+            !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS))
+          break;
         Opcode = X86ISD::FMIN;
         break;
       case ISD::SETULE:
-        // This can be a min, but if either operand is a NaN we need it to
-        // preserve the original LHS.
+        // Converting this to a min would handle both negative zeros and NaNs
+        // incorrectly, but we can swap the operands to fix both.
         std::swap(LHS, RHS);
       case ISD::SETOLT:
       case ISD::SETLT:
@@ -8855,32 +8888,29 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
         break;
 
       case ISD::SETOGE:
-        // This can be a max if we can prove that at least one of the operands
-        // is not a nan.
-        if (!FiniteOnlyFPMath()) {
-          if (DAG.isKnownNeverNaN(LHS)) {
-            // Put the potential NaN in the RHS so that SSE will preserve it.
-            std::swap(LHS, RHS);
-          } else if (!DAG.isKnownNeverNaN(RHS))
-            break;
-        }
+        // Converting this to a max would handle comparisons between positive
+        // and negative zero incorrectly.
+        if (!UnsafeFPMath &&
+            !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(LHS))
+          break;
         Opcode = X86ISD::FMAX;
         break;
       case ISD::SETUGT:
-        // This can be a max if we can prove that at least one of the operands
-        // is not a nan.
-        if (!FiniteOnlyFPMath()) {
-          if (DAG.isKnownNeverNaN(RHS)) {
-            // Put the potential NaN in the RHS so that SSE will preserve it.
-            std::swap(LHS, RHS);
-          } else if (!DAG.isKnownNeverNaN(LHS))
+        // Converting this to a max would handle NaNs incorrectly, and swapping
+        // the operands would cause it to handle comparisons between positive
+        // and negative zero incorrectly.
+        if (!FiniteOnlyFPMath() &&
+            (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) {
+          if (!UnsafeFPMath &&
+              !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
             break;
+          std::swap(LHS, RHS);
         }
         Opcode = X86ISD::FMAX;
         break;
       case ISD::SETUGE:
-        // This can be a max, but if either operand is a NaN we need it to
-        // preserve the original LHS.
+        // Converting this to a max would handle both negative zeros and NaNs
+        // incorrectly, but we can swap the operands to fix both.
         std::swap(LHS, RHS);
       case ISD::SETOGT:
       case ISD::SETGT:
@@ -8889,36 +8919,33 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
         break;
       }
     // Check for x CC y ? y : x -- a min/max with reversed arms.
-    } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) {
+    } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
+               DAG.isEqualTo(RHS, Cond.getOperand(0))) {
       switch (CC) {
       default: break;
       case ISD::SETOGE:
-        // This can be a min if we can prove that at least one of the operands
-        // is not a nan.
-        if (!FiniteOnlyFPMath()) {
-          if (DAG.isKnownNeverNaN(RHS)) {
-            // Put the potential NaN in the RHS so that SSE will preserve it.
-            std::swap(LHS, RHS);
-          } else if (!DAG.isKnownNeverNaN(LHS))
+        // Converting this to a min would handle comparisons between positive
+        // and negative zero incorrectly, and swapping the operands would
+        // cause it to handle NaNs incorrectly.
+        if (!UnsafeFPMath &&
+            !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) {
+          if (!FiniteOnlyFPMath() &&
+              (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
             break;
+          std::swap(LHS, RHS);
         }
         Opcode = X86ISD::FMIN;
         break;
       case ISD::SETUGT:
-        // This can be a min if we can prove that at least one of the operands
-        // is not a nan.
-        if (!FiniteOnlyFPMath()) {
-          if (DAG.isKnownNeverNaN(LHS)) {
-            // Put the potential NaN in the RHS so that SSE will preserve it.
-            std::swap(LHS, RHS);
-          } else if (!DAG.isKnownNeverNaN(RHS))
-            break;
-        }
+        // Converting this to a min would handle NaNs incorrectly.
+        if (!UnsafeFPMath &&
+            (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
+          break;
         Opcode = X86ISD::FMIN;
         break;
       case ISD::SETUGE:
-        // This can be a min, but if either operand is a NaN we need it to
-        // preserve the original LHS.
+        // Converting this to a min would handle both negative zeros and NaNs
+        // incorrectly, but we can swap the operands to fix both.
         std::swap(LHS, RHS);
       case ISD::SETOGT:
       case ISD::SETGT:
@@ -8927,32 +8954,28 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
         break;
 
       case ISD::SETULT:
-        // This can be a max if we can prove that at least one of the operands
-        // is not a nan.
-        if (!FiniteOnlyFPMath()) {
-          if (DAG.isKnownNeverNaN(LHS)) {
-            // Put the potential NaN in the RHS so that SSE will preserve it.
-            std::swap(LHS, RHS);
-          } else if (!DAG.isKnownNeverNaN(RHS))
-            break;
-        }
+        // Converting this to a max would handle NaNs incorrectly.
+        if (!FiniteOnlyFPMath() &&
+            (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
+          break;
         Opcode = X86ISD::FMAX;
         break;
       case ISD::SETOLE:
-        // This can be a max if we can prove that at least one of the operands
-        // is not a nan.
-        if (!FiniteOnlyFPMath()) {
-          if (DAG.isKnownNeverNaN(RHS)) {
-            // Put the potential NaN in the RHS so that SSE will preserve it.
-            std::swap(LHS, RHS);
-          } else if (!DAG.isKnownNeverNaN(LHS))
+        // Converting this to a max would handle comparisons between positive
+        // and negative zero incorrectly, and swapping the operands would
+        // cause it to handle NaNs incorrectly.
+        if (!UnsafeFPMath &&
+            !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) {
+          if (!FiniteOnlyFPMath() &&
+              (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
             break;
+          std::swap(LHS, RHS);
         }
         Opcode = X86ISD::FMAX;
         break;
       case ISD::SETULE:
-        // This can be a max, but if either operand is a NaN we need it to
-        // preserve the original LHS.
+        // Converting this to a max would handle both negative zeros and NaNs
+        // incorrectly, but we can swap the operands to fix both.
         std::swap(LHS, RHS);
       case ISD::SETOLT:
       case ISD::SETLT:
@@ -9177,10 +9200,6 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
 /// LEA + SHL, LEA + LEA.
 static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
                                  TargetLowering::DAGCombinerInfo &DCI) {
-  if (DAG.getMachineFunction().
-      getFunction()->hasFnAttr(Attribute::OptimizeForSize))
-    return SDValue();
-
   if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
     return SDValue();
 
@@ -9319,7 +9338,7 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
       }
     } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
        if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(InVec.getOperand(2))) {
-         unsigned SplatIdx = cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex();
+         unsigned SplatIdx= cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex();
          if (C->getZExtValue() == SplatIdx)
            BaseShAmt = InVec.getOperand(1);
        }
@@ -9505,7 +9524,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
       SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(),
                                   Ld->getBasePtr(), Ld->getSrcValue(),
                                   Ld->getSrcValueOffset(), Ld->isVolatile(),
-                                  Ld->getAlignment());
+                                  Ld->isNonTemporal(), Ld->getAlignment());
       SDValue NewChain = NewLd.getValue(1);
       if (TokenFactorIndex != -1) {
         Ops.push_back(NewChain);
@@ -9514,7 +9533,8 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
       }
       return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(),
                           St->getSrcValue(), St->getSrcValueOffset(),
-                          St->isVolatile(), St->getAlignment());
+                          St->isVolatile(), St->isNonTemporal(),
+                          St->getAlignment());
     }
 
     // Otherwise, lower to two pairs of 32-bit loads / stores.
@@ -9524,10 +9544,11 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
 
     SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr,
                                Ld->getSrcValue(), Ld->getSrcValueOffset(),
-                               Ld->isVolatile(), Ld->getAlignment());
+                               Ld->isVolatile(), Ld->isNonTemporal(),
+                               Ld->getAlignment());
     SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr,
                                Ld->getSrcValue(), Ld->getSrcValueOffset()+4,
-                               Ld->isVolatile(),
+                               Ld->isVolatile(), Ld->isNonTemporal(),
                                MinAlign(Ld->getAlignment(), 4));
 
     SDValue NewChain = LoLd.getValue(1);
@@ -9544,11 +9565,13 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
 
     SDValue LoSt = DAG.getStore(NewChain, StDL, LoLd, LoAddr,
                                 St->getSrcValue(), St->getSrcValueOffset(),
-                                St->isVolatile(), St->getAlignment());
+                                St->isVolatile(), St->isNonTemporal(),
+                                St->getAlignment());
     SDValue HiSt = DAG.getStore(NewChain, StDL, HiLd, HiAddr,
                                 St->getSrcValue(),
                                 St->getSrcValueOffset() + 4,
                                 St->isVolatile(),
+                                St->isNonTemporal(),
                                 MinAlign(St->getAlignment(), 4));
     return DAG.getNode(ISD::TokenFactor, StDL, MVT::Other, LoSt, HiSt);
   }
@@ -9731,7 +9754,7 @@ static bool LowerToBSwap(CallInst *CI) {
   // Verify this is a simple bswap.
   if (CI->getNumOperands() != 2 ||
       CI->getType() != CI->getOperand(1)->getType() ||
-      !CI->getType()->isInteger())
+      !CI->getType()->isIntegerTy())
     return false;
 
   const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
@@ -9780,17 +9803,26 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
       return LowerToBSwap(CI);
     }
     // rorw $$8, ${0:w}  -->  llvm.bswap.i16
-    if (CI->getType()->isInteger(16) &&
+    if (CI->getType()->isIntegerTy(16) &&
         AsmPieces.size() == 3 &&
-        AsmPieces[0] == "rorw" &&
+        (AsmPieces[0] == "rorw" || AsmPieces[0] == "rolw") &&
         AsmPieces[1] == "$$8," &&
         AsmPieces[2] == "${0:w}" &&
-        IA->getConstraintString() == "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}") {
-      return LowerToBSwap(CI);
+        IA->getConstraintString().compare(0, 5, "=r,0,") == 0) {
+      AsmPieces.clear();
+      SplitString(IA->getConstraintString().substr(5), AsmPieces, ",");
+      std::sort(AsmPieces.begin(), AsmPieces.end());
+      if (AsmPieces.size() == 4 &&
+          AsmPieces[0] == "~{cc}" &&
+          AsmPieces[1] == "~{dirflag}" &&
+          AsmPieces[2] == "~{flags}" &&
+          AsmPieces[3] == "~{fpsr}") {
+        return LowerToBSwap(CI);
+      }
     }
     break;
   case 3:
-    if (CI->getType()->isInteger(64) &&
+    if (CI->getType()->isIntegerTy(64) &&
         Constraints.size() >= 2 &&
         Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
         Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 193ef05..4c12fcc 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -180,7 +180,7 @@ namespace llvm {
 
       /// PINSRW - Insert the lower 16-bits of a 32-bit value to a vector,
       /// corresponds to X86::PINSRW.
-      PINSRW,
+      PINSRW, MMX_PINSRW,
 
       /// PSHUFB - Shuffle 16 8-bit values within a vector.
       PSHUFB,
@@ -249,6 +249,9 @@ namespace llvm {
       // with control flow.
       VASTART_SAVE_XMM_REGS,
 
+      // MINGW_ALLOCA - MingW's __alloca call to do stack probing.
+      MINGW_ALLOCA,
+
       // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG, 
       // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG - 
       // Atomic 64-bit binary operations.
@@ -259,6 +262,10 @@ namespace llvm {
       ATOMAND64_DAG,
       ATOMNAND64_DAG,
       ATOMSWAP64_DAG
+
+      // WARNING: Do not add anything in the end unless you want the node to
+      // have memop! In fact, starting from ATOMADD64_DAG all opcodes will be
+      // thought as target memory ops!
     };
   }
 
@@ -639,7 +646,6 @@ namespace llvm {
                                 int FPDiff, DebugLoc dl);
 
     CCAssignFn *CCAssignFnForNode(CallingConv::ID CallConv) const;
-    NameDecorationStyle NameDecorationForCallConv(CallingConv::ID CallConv);
     unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG &DAG);
 
     std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
@@ -790,7 +796,11 @@ namespace llvm {
     MachineBasicBlock *EmitLoweredSelect(MachineInstr *I,
                                          MachineBasicBlock *BB,
                     DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
-    
+
+    MachineBasicBlock *EmitLoweredMingwAlloca(MachineInstr *MI,
+                                              MachineBasicBlock *BB,
+                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+
     /// Emit nodes that will be selected as "test Op0,Op0", or something
     /// equivalent, for use with the given x86 condition code.
     SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG);
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index 468dd67..8462255 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -59,10 +59,11 @@ def tls64addr : ComplexPattern<i64, 4, "SelectTLSADDRAddr",
 // Pattern fragments.
 //
 
-def i64immSExt8  : PatLeaf<(i64 imm), [{
-  // i64immSExt8 predicate - True if the 64-bit immediate fits in a 8-bit
-  // sign extended field.
-  return (int64_t)N->getZExtValue() == (int8_t)N->getZExtValue();
+def i64immSExt8  : PatLeaf<(i64 immSext8)>;
+
+def GetLo32XForm : SDNodeXForm<imm, [{
+  // Transformation function: get the low 32 bits.
+  return getI32Imm((unsigned)N->getZExtValue());
 }]>;
 
 def i64immSExt32  : PatLeaf<(i64 imm), [{
@@ -71,6 +72,7 @@ def i64immSExt32  : PatLeaf<(i64 imm), [{
   return (int64_t)N->getZExtValue() == (int32_t)N->getZExtValue();
 }]>;
 
+
 def i64immZExt32  : PatLeaf<(i64 imm), [{
   // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
   // unsignedsign extended field.
@@ -325,7 +327,7 @@ def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
 def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
                      "mov{q}\t{$src, $dst|$dst, $src}", []>;
 
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
 def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
                  "mov{q}\t{$src, $dst|$dst, $src}",
                  [(set GR64:$dst, (load addr:$src))]>;
@@ -556,7 +558,7 @@ def ADC64mi8 : RIi8<0x83, MRM2m, (outs), (ins i64mem:$dst, i64i8imm :$src2),
                   addr:$dst)]>;
 def ADC64mi32 : RIi32<0x81, MRM2m, (outs), (ins i64mem:$dst, i64i32imm:$src2),
                       "adc{q}\t{$src2, $dst|$dst, $src2}",
-                 [(store (adde (load addr:$dst), i64immSExt8:$src2), 
+                 [(store (adde (load addr:$dst), i64immSExt32:$src2), 
                   addr:$dst)]>;
 } // Uses = [EFLAGS]
 
@@ -893,35 +895,38 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
 let isTwoAddress = 1 in {
 def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src),
                  "rcl{q}\t{1, $dst|$dst, 1}", []>;
-def RCL64m1 : RI<0xD1, MRM2m, (outs i64mem:$dst), (ins i64mem:$src),
-                 "rcl{q}\t{1, $dst|$dst, 1}", []>;
-let Uses = [CL] in {
-def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src),
-                  "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
-def RCL64mCL : RI<0xD3, MRM2m, (outs i64mem:$dst), (ins i64mem:$src),
-                  "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
-}
 def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt),
                    "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCL64mi : RIi8<0xC1, MRM2m, (outs i64mem:$dst), 
-                   (ins i64mem:$src, i8imm:$cnt),
-                   "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
 
 def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src),
                  "rcr{q}\t{1, $dst|$dst, 1}", []>;
-def RCR64m1 : RI<0xD1, MRM3m, (outs i64mem:$dst), (ins i64mem:$src),
-                 "rcr{q}\t{1, $dst|$dst, 1}", []>;
+def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt),
+                   "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+
 let Uses = [CL] in {
+def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src),
+                  "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
 def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src),
                   "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
-def RCR64mCL : RI<0xD3, MRM3m, (outs i64mem:$dst), (ins i64mem:$src),
-                  "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
 }
-def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt),
-                   "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCR64mi : RIi8<0xC1, MRM3m, (outs i64mem:$dst), 
-                   (ins i64mem:$src, i8imm:$cnt),
+}
+
+let isTwoAddress = 0 in {
+def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst),
+                 "rcl{q}\t{1, $dst|$dst, 1}", []>;
+def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt),
+                   "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst),
+                 "rcr{q}\t{1, $dst|$dst, 1}", []>;
+def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, i8imm:$cnt),
                    "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+
+let Uses = [CL] in {
+def RCL64mCL : RI<0xD3, MRM2m, (outs), (ins i64mem:$dst),
+                  "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
+def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst),
+                  "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
+}
 }
 
 let isTwoAddress = 1 in {
@@ -1771,7 +1776,7 @@ def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
 def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
                  "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB;
 
-def SWPGS : I<0x01, RawFrm, (outs), (ins), "swpgs", []>, TB;
+def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", []>, TB;
 
 def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins),
                  "push{q}\t%fs", []>, TB;
@@ -1978,7 +1983,7 @@ def : Pat<(and GR64:$src, i64immZExt32:$imm),
             (i64 0),
             (AND32ri
               (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit),
-              imm:$imm),
+              (i32 (GetLo32XForm imm:$imm))),
             x86_subreg_32bit)>;
 
 // r & (2^32-1) ==> movz
@@ -2102,34 +2107,34 @@ def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
 def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
 
 // (shl x (and y, 63)) ==> (shl x, y)
-def : Pat<(shl GR64:$src1, (and CL:$amt, 63)),
+def : Pat<(shl GR64:$src1, (and CL, 63)),
           (SHL64rCL GR64:$src1)>;
-def : Pat<(store (shl (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst),
+def : Pat<(store (shl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
           (SHL64mCL addr:$dst)>;
 
-def : Pat<(srl GR64:$src1, (and CL:$amt, 63)),
+def : Pat<(srl GR64:$src1, (and CL, 63)),
           (SHR64rCL GR64:$src1)>;
-def : Pat<(store (srl (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst),
+def : Pat<(store (srl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
           (SHR64mCL addr:$dst)>;
 
-def : Pat<(sra GR64:$src1, (and CL:$amt, 63)),
+def : Pat<(sra GR64:$src1, (and CL, 63)),
           (SAR64rCL GR64:$src1)>;
-def : Pat<(store (sra (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst),
+def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
           (SAR64mCL addr:$dst)>;
 
 // Double shift patterns
-def : Pat<(shrd GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)),
+def : Pat<(shrd GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm)),
           (SHRD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>;
 
 def : Pat<(store (shrd (loadi64 addr:$dst), (i8 imm:$amt1),
-                       GR64:$src2, (i8 imm:$amt2)), addr:$dst),
+                       GR64:$src2, (i8 imm)), addr:$dst),
           (SHRD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>;
 
-def : Pat<(shld GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)),
+def : Pat<(shld GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm)),
           (SHLD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>;
 
 def : Pat<(store (shld (loadi64 addr:$dst), (i8 imm:$amt1),
-                       GR64:$src2, (i8 imm:$amt2)), addr:$dst),
+                       GR64:$src2, (i8 imm)), addr:$dst),
           (SHLD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>;
 
 // (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index e22a903..ae24bfb 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -397,7 +397,7 @@ def CMOVNP_F : FPI<0xD8, AddRegFrm, (outs RST:$op), (ins),
 let canFoldAsLoad = 1 in {
 def LD_Fp32m   : FpIf32<(outs RFP32:$dst), (ins f32mem:$src), ZeroArgFP,
                   [(set RFP32:$dst, (loadf32 addr:$src))]>;
-let isReMaterializable = 1, mayHaveSideEffects = 1 in
+let isReMaterializable = 1 in
   def LD_Fp64m : FpIf64<(outs RFP64:$dst), (ins f64mem:$src), ZeroArgFP,
                   [(set RFP64:$dst, (loadf64 addr:$src))]>;
 def LD_Fp80m   : FpI_<(outs RFP80:$dst), (ins f80mem:$src), ZeroArgFP,
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index a799f16..bb81cbf 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -29,7 +29,16 @@ def MRM0m  : Format<24>; def MRM1m  : Format<25>; def MRM2m  : Format<26>;
 def MRM3m  : Format<27>; def MRM4m  : Format<28>; def MRM5m  : Format<29>;
 def MRM6m  : Format<30>; def MRM7m  : Format<31>;
 def MRMInitReg : Format<32>;
-
+def MRM_C1 : Format<33>;
+def MRM_C2 : Format<34>;
+def MRM_C3 : Format<35>;
+def MRM_C4 : Format<36>;
+def MRM_C8 : Format<37>;
+def MRM_C9 : Format<38>;
+def MRM_E8 : Format<39>;
+def MRM_F0 : Format<40>;
+def MRM_F8 : Format<41>;
+def MRM_F9 : Format<42>;
 
 // ImmType - This specifies the immediate type used by an instruction. This is
 // part of the ad-hoc solution used to emit machine instruction encodings by our
@@ -37,11 +46,13 @@ def MRMInitReg : Format<32>;
 class ImmType<bits<3> val> {
   bits<3> Value = val;
 }
-def NoImm  : ImmType<0>;
-def Imm8   : ImmType<1>;
-def Imm16  : ImmType<2>;
-def Imm32  : ImmType<3>;
-def Imm64  : ImmType<4>;
+def NoImm      : ImmType<0>;
+def Imm8       : ImmType<1>;
+def Imm8PCRel  : ImmType<2>;
+def Imm16      : ImmType<3>;
+def Imm32      : ImmType<4>;
+def Imm32PCRel : ImmType<5>;
+def Imm64      : ImmType<6>;
 
 // FPFormat - This specifies what form this FP instruction has.  This is used by
 // the Floating-Point stackifier pass.
@@ -121,6 +132,12 @@ class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm,
   let Pattern = pattern;
   let CodeSize = 3;
 }
+class Ii8PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, 
+               list<dag> pattern>
+  : X86Inst<o, f, Imm8PCRel, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
 class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm, 
            list<dag> pattern>
   : X86Inst<o, f, Imm16, outs, ins, asm> {
@@ -134,6 +151,13 @@ class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm,
   let CodeSize = 3;
 }
 
+class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, 
+           list<dag> pattern>
+  : X86Inst<o, f, Imm32PCRel, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+
 // FPStack Instruction Templates:
 // FPI - Floating Point Instruction template.
 class FPI<bits<8> o, Format F, dag outs, dag ins, string asm>
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 8d13c0f..39bda04 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -276,11 +276,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::MOVDQArr,    X86::MOVDQAmr, 0, 16 },
     { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 },
     { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 },
-    { X86::MOVPS2SSrr,  X86::MOVPS2SSmr, 0, 0 },
-    { X86::MOVSDrr,     X86::MOVSDmr, 0, 0 },
     { X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 },
     { X86::MOVSS2DIrr,  X86::MOVSS2DImr, 0, 0 },
-    { X86::MOVSSrr,     X86::MOVSSmr, 0, 0 },
     { X86::MOVUPDrr,    X86::MOVUPDmr, 0, 0 },
     { X86::MOVUPSrr,    X86::MOVUPSmr, 0, 0 },
     { X86::MUL16r,      X86::MUL16m, 1, 0 },
@@ -389,12 +386,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::MOVDI2PDIrr,     X86::MOVDI2PDIrm, 0 },
     { X86::MOVDI2SSrr,      X86::MOVDI2SSrm, 0 },
     { X86::MOVDQArr,        X86::MOVDQArm, 16 },
-    { X86::MOVSD2PDrr,      X86::MOVSD2PDrm, 0 },
-    { X86::MOVSDrr,         X86::MOVSDrm, 0 },
     { X86::MOVSHDUPrr,      X86::MOVSHDUPrm, 16 },
     { X86::MOVSLDUPrr,      X86::MOVSLDUPrm, 16 },
-    { X86::MOVSS2PSrr,      X86::MOVSS2PSrm, 0 },
-    { X86::MOVSSrr,         X86::MOVSSrm, 0 },
     { X86::MOVSX16rr8,      X86::MOVSX16rm8, 0 },
     { X86::MOVSX32rr16,     X86::MOVSX32rm16, 0 },
     { X86::MOVSX32rr8,      X86::MOVSX32rm8, 0 },
@@ -682,23 +675,20 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
   case X86::MOV16rr:
   case X86::MOV32rr: 
   case X86::MOV64rr:
-  case X86::MOVSSrr:
-  case X86::MOVSDrr:
 
   // FP Stack register class copies
   case X86::MOV_Fp3232: case X86::MOV_Fp6464: case X86::MOV_Fp8080:
   case X86::MOV_Fp3264: case X86::MOV_Fp3280:
   case X86::MOV_Fp6432: case X86::MOV_Fp8032:
-      
+
+  // Note that MOVSSrr and MOVSDrr are not considered copies. FR32 and FR64
+  // copies are done with FsMOVAPSrr and FsMOVAPDrr.
+
   case X86::FsMOVAPSrr:
   case X86::FsMOVAPDrr:
   case X86::MOVAPSrr:
   case X86::MOVAPDrr:
   case X86::MOVDQArr:
-  case X86::MOVSS2PSrr:
-  case X86::MOVSD2PDrr:
-  case X86::MOVPS2SSrr:
-  case X86::MOVPD2SDrr:
   case X86::MMX_MOVQ64rr:
     assert(MI.getNumOperands() >= 2 &&
            MI.getOperand(0).isReg() &&
@@ -1083,7 +1073,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
       case X86::MOV8r0:  Opc = X86::MOV8ri;  break;
       case X86::MOV16r0: Opc = X86::MOV16ri; break;
       case X86::MOV32r0: Opc = X86::MOV32ri; break;
-      case X86::MOV64r0: Opc = X86::MOV64ri; break;
+      case X86::MOV64r0: Opc = X86::MOV64ri64i32; break;
       }
       Clone = false;
     }
@@ -1587,44 +1577,44 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
 static X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) {
   switch (BrOpc) {
   default: return X86::COND_INVALID;
-  case X86::JE:  return X86::COND_E;
-  case X86::JNE: return X86::COND_NE;
-  case X86::JL:  return X86::COND_L;
-  case X86::JLE: return X86::COND_LE;
-  case X86::JG:  return X86::COND_G;
-  case X86::JGE: return X86::COND_GE;
-  case X86::JB:  return X86::COND_B;
-  case X86::JBE: return X86::COND_BE;
-  case X86::JA:  return X86::COND_A;
-  case X86::JAE: return X86::COND_AE;
-  case X86::JS:  return X86::COND_S;
-  case X86::JNS: return X86::COND_NS;
-  case X86::JP:  return X86::COND_P;
-  case X86::JNP: return X86::COND_NP;
-  case X86::JO:  return X86::COND_O;
-  case X86::JNO: return X86::COND_NO;
+  case X86::JE_4:  return X86::COND_E;
+  case X86::JNE_4: return X86::COND_NE;
+  case X86::JL_4:  return X86::COND_L;
+  case X86::JLE_4: return X86::COND_LE;
+  case X86::JG_4:  return X86::COND_G;
+  case X86::JGE_4: return X86::COND_GE;
+  case X86::JB_4:  return X86::COND_B;
+  case X86::JBE_4: return X86::COND_BE;
+  case X86::JA_4:  return X86::COND_A;
+  case X86::JAE_4: return X86::COND_AE;
+  case X86::JS_4:  return X86::COND_S;
+  case X86::JNS_4: return X86::COND_NS;
+  case X86::JP_4:  return X86::COND_P;
+  case X86::JNP_4: return X86::COND_NP;
+  case X86::JO_4:  return X86::COND_O;
+  case X86::JNO_4: return X86::COND_NO;
   }
 }
 
 unsigned X86::GetCondBranchFromCond(X86::CondCode CC) {
   switch (CC) {
   default: llvm_unreachable("Illegal condition code!");
-  case X86::COND_E:  return X86::JE;
-  case X86::COND_NE: return X86::JNE;
-  case X86::COND_L:  return X86::JL;
-  case X86::COND_LE: return X86::JLE;
-  case X86::COND_G:  return X86::JG;
-  case X86::COND_GE: return X86::JGE;
-  case X86::COND_B:  return X86::JB;
-  case X86::COND_BE: return X86::JBE;
-  case X86::COND_A:  return X86::JA;
-  case X86::COND_AE: return X86::JAE;
-  case X86::COND_S:  return X86::JS;
-  case X86::COND_NS: return X86::JNS;
-  case X86::COND_P:  return X86::JP;
-  case X86::COND_NP: return X86::JNP;
-  case X86::COND_O:  return X86::JO;
-  case X86::COND_NO: return X86::JNO;
+  case X86::COND_E:  return X86::JE_4;
+  case X86::COND_NE: return X86::JNE_4;
+  case X86::COND_L:  return X86::JL_4;
+  case X86::COND_LE: return X86::JLE_4;
+  case X86::COND_G:  return X86::JG_4;
+  case X86::COND_GE: return X86::JGE_4;
+  case X86::COND_B:  return X86::JB_4;
+  case X86::COND_BE: return X86::JBE_4;
+  case X86::COND_A:  return X86::JA_4;
+  case X86::COND_AE: return X86::JAE_4;
+  case X86::COND_S:  return X86::JS_4;
+  case X86::COND_NS: return X86::JNS_4;
+  case X86::COND_P:  return X86::JP_4;
+  case X86::COND_NP: return X86::JNP_4;
+  case X86::COND_O:  return X86::JO_4;
+  case X86::COND_NO: return X86::JNO_4;
   }
 }
 
@@ -1694,7 +1684,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
       return true;
 
     // Handle unconditional branches.
-    if (I->getOpcode() == X86::JMP) {
+    if (I->getOpcode() == X86::JMP_4) {
       if (!AllowModify) {
         TBB = I->getOperand(0).getMBB();
         continue;
@@ -1778,7 +1768,7 @@ unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
 
   while (I != MBB.begin()) {
     --I;
-    if (I->getOpcode() != X86::JMP &&
+    if (I->getOpcode() != X86::JMP_4 &&
         GetCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID)
       break;
     // Remove the branch.
@@ -1804,7 +1794,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
   if (Cond.empty()) {
     // Unconditional branch?
     assert(!FBB && "Unconditional branch with multiple successors!");
-    BuildMI(&MBB, dl, get(X86::JMP)).addMBB(TBB);
+    BuildMI(&MBB, dl, get(X86::JMP_4)).addMBB(TBB);
     return 1;
   }
 
@@ -1814,16 +1804,16 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
   switch (CC) {
   case X86::COND_NP_OR_E:
     // Synthesize NP_OR_E with two branches.
-    BuildMI(&MBB, dl, get(X86::JNP)).addMBB(TBB);
+    BuildMI(&MBB, dl, get(X86::JNP_4)).addMBB(TBB);
     ++Count;
-    BuildMI(&MBB, dl, get(X86::JE)).addMBB(TBB);
+    BuildMI(&MBB, dl, get(X86::JE_4)).addMBB(TBB);
     ++Count;
     break;
   case X86::COND_NE_OR_P:
     // Synthesize NE_OR_P with two branches.
-    BuildMI(&MBB, dl, get(X86::JNE)).addMBB(TBB);
+    BuildMI(&MBB, dl, get(X86::JNE_4)).addMBB(TBB);
     ++Count;
-    BuildMI(&MBB, dl, get(X86::JP)).addMBB(TBB);
+    BuildMI(&MBB, dl, get(X86::JP_4)).addMBB(TBB);
     ++Count;
     break;
   default: {
@@ -1834,7 +1824,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
   }
   if (FBB) {
     // Two-way Conditional branch. Insert the second branch.
-    BuildMI(&MBB, dl, get(X86::JMP)).addMBB(FBB);
+    BuildMI(&MBB, dl, get(X86::JMP_4)).addMBB(FBB);
     ++Count;
   }
   return Count;
@@ -1860,7 +1850,7 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
     CommonRC = SrcRC;
   else if (!DestRC->hasSubClass(SrcRC)) {
     // Neither of GR64_NOREX or GR64_NOSP is a superclass of the other,
-    // but we want to copy then as GR64. Similarly, for GR32_NOREX and
+    // but we want to copy them as GR64. Similarly, for GR32_NOREX and
     // GR32_NOSP, copy as GR32.
     if (SrcRC->hasSuperClass(&X86::GR64RegClass) &&
         DestRC->hasSuperClass(&X86::GR64RegClass))
@@ -3556,6 +3546,14 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
       }
     }
     break;
+    
+  case X86II::MRM_C1:
+  case X86II::MRM_C8:
+  case X86II::MRM_C9:
+  case X86II::MRM_E8:
+  case X86II::MRM_F0:
+    FinalSize += 2;
+    break;
   }
 
   case X86II::MRMInitReg:
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index a6b3863..5111719 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -268,6 +268,18 @@ namespace X86II {
     // MRMInitReg - This form is used for instructions whose source and
     // destinations are the same register.
     MRMInitReg = 32,
+    
+    //// MRM_C1 - A mod/rm byte of exactly 0xC1.
+    MRM_C1 = 33,
+    MRM_C2 = 34,
+    MRM_C3 = 35,
+    MRM_C4 = 36,
+    MRM_C8 = 37,
+    MRM_C9 = 38,
+    MRM_E8 = 39,
+    MRM_F0 = 40,
+    MRM_F8 = 41,
+    MRM_F9 = 42,
 
     FormMask       = 63,
 
@@ -331,11 +343,13 @@ namespace X86II {
     // This three-bit field describes the size of an immediate operand.  Zero is
     // unused so that we can tell if we forgot to set a value.
     ImmShift = 13,
-    ImmMask  = 7 << ImmShift,
-    Imm8     = 1 << ImmShift,
-    Imm16    = 2 << ImmShift,
-    Imm32    = 3 << ImmShift,
-    Imm64    = 4 << ImmShift,
+    ImmMask    = 7 << ImmShift,
+    Imm8       = 1 << ImmShift,
+    Imm8PCRel  = 2 << ImmShift,
+    Imm16      = 3 << ImmShift,
+    Imm32      = 4 << ImmShift,
+    Imm32PCRel = 5 << ImmShift,
+    Imm64      = 6 << ImmShift,
 
     //===------------------------------------------------------------------===//
     // FP Instruction Classification...  Zero is non-fp instruction.
@@ -396,15 +410,37 @@ namespace X86II {
     return TSFlags >> X86II::OpcodeShift;
   }
   
+  static inline bool hasImm(unsigned TSFlags) {
+    return (TSFlags & X86II::ImmMask) != 0;
+  }
+  
   /// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field
   /// of the specified instruction.
   static inline unsigned getSizeOfImm(unsigned TSFlags) {
     switch (TSFlags & X86II::ImmMask) {
     default: assert(0 && "Unknown immediate size");
-    case X86II::Imm8:   return 1;
-    case X86II::Imm16:  return 2;
-    case X86II::Imm32:  return 4;
-    case X86II::Imm64:  return 8;
+    case X86II::Imm8:
+    case X86II::Imm8PCRel:  return 1;
+    case X86II::Imm16:      return 2;
+    case X86II::Imm32:
+    case X86II::Imm32PCRel: return 4;
+    case X86II::Imm64:      return 8;
+    }
+  }
+  
+  /// isImmPCRel - Return true if the immediate of the specified instruction's
+  /// TSFlags indicates that it is pc relative.
+  static inline unsigned isImmPCRel(unsigned TSFlags) {
+    switch (TSFlags & X86II::ImmMask) {
+      default: assert(0 && "Unknown immediate size");
+      case X86II::Imm8PCRel:
+      case X86II::Imm32PCRel:
+        return true;
+      case X86II::Imm8:
+      case X86II::Imm16:
+      case X86II::Imm32:
+      case X86II::Imm64:
+        return false;
     }
   }    
 }
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index f0b4239..8a6ff54 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -65,7 +65,7 @@ def SDT_X86VASTART_SAVE_XMM_REGS : SDTypeProfile<0, -1, [SDTCisVT<0, i8>,
 
 def SDTX86RepStr  : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;
 
-def SDTX86RdTsc   : SDTypeProfile<0, 0, []>;
+def SDTX86Void    : SDTypeProfile<0, 0, []>;
 
 def SDTX86Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
 
@@ -143,7 +143,7 @@ def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr,
                         [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore,
                          SDNPMayLoad]>;
 
-def X86rdtsc   : SDNode<"X86ISD::RDTSC_DAG",SDTX86RdTsc,
+def X86rdtsc   : SDNode<"X86ISD::RDTSC_DAG", SDTX86Void,
                         [SDNPHasChain, SDNPOutFlag, SDNPSideEffect]>;
 
 def X86Wrapper    : SDNode<"X86ISD::Wrapper",     SDTX86Wrapper>;
@@ -178,6 +178,9 @@ def X86and_flag  : SDNode<"X86ISD::AND",  SDTBinaryArithWithFlags,
 
 def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
 
+def X86MingwAlloca : SDNode<"X86ISD::MINGW_ALLOCA", SDTX86Void,
+                            [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
+
 //===----------------------------------------------------------------------===//
 // X86 Operand Definitions.
 //
@@ -343,18 +346,37 @@ def X86_COND_O   : PatLeaf<(i8 13)>;
 def X86_COND_P   : PatLeaf<(i8 14)>; // alt. COND_PE
 def X86_COND_S   : PatLeaf<(i8 15)>;
 
-def i16immSExt8  : PatLeaf<(i16 imm), [{
-  // i16immSExt8 predicate - True if the 16-bit immediate fits in a 8-bit
-  // sign extended field.
-  return (int16_t)N->getZExtValue() == (int8_t)N->getZExtValue();
+def immSext8 : PatLeaf<(imm), [{
+  return N->getSExtValue() == (int8_t)N->getSExtValue();
 }]>;
 
-def i32immSExt8  : PatLeaf<(i32 imm), [{
-  // i32immSExt8 predicate - True if the 32-bit immediate fits in a 8-bit
-  // sign extended field.
-  return (int32_t)N->getZExtValue() == (int8_t)N->getZExtValue();
+def i16immSExt8  : PatLeaf<(i16 immSext8)>;
+def i32immSExt8  : PatLeaf<(i32 immSext8)>;
+
+/// Load patterns: these constraint the match to the right address space.
+def dsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
+    if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+      if (PT->getAddressSpace() > 255)
+        return false;
+  return true;
 }]>;
 
+def gsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
+    if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+      return PT->getAddressSpace() == 256;
+  return false;
+}]>;
+
+def fsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
+    if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+      return PT->getAddressSpace() == 257;
+  return false;
+}]>;
+
+
 // Helper fragments for loads.
 // It's always safe to treat a anyext i16 load as a i32 load if the i16 is
 // known to be 32-bit aligned or better. Ditto for i8 to i16.
@@ -372,8 +394,7 @@ def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{
   return false;
 }]>;
 
-def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)),
-[{
+def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)),[{
   LoadSDNode *LD = cast<LoadSDNode>(N);
   if (const Value *Src = LD->getSrcValue())
     if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
@@ -399,72 +420,11 @@ def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
   return false;
 }]>;
 
-def nvloadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
-  LoadSDNode *LD = cast<LoadSDNode>(N);
-  if (const Value *Src = LD->getSrcValue())
-    if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
-      if (PT->getAddressSpace() > 255)
-        return false;
-  if (LD->isVolatile())
-    return false;
-  ISD::LoadExtType ExtType = LD->getExtensionType();
-  if (ExtType == ISD::NON_EXTLOAD)
-    return true;
-  if (ExtType == ISD::EXTLOAD)
-    return LD->getAlignment() >= 4;
-  return false;
-}]>;
-
-def gsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
-  if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
-    if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
-      return PT->getAddressSpace() == 256;
-  return false;
-}]>;
-
-def fsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
-  if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
-    if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
-      return PT->getAddressSpace() == 257;
-  return false;
-}]>;
-
-def loadi8  : PatFrag<(ops node:$ptr), (i8  (load node:$ptr)), [{
-  if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
-    if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
-      if (PT->getAddressSpace() > 255)
-        return false;
-  return true;
-}]>;
-def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr)), [{
-  if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
-    if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
-      if (PT->getAddressSpace() > 255)
-        return false;
-  return true;
-}]>;
-
-def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr)), [{
-  if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
-    if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
-      if (PT->getAddressSpace() > 255)
-        return false;
-  return true;
-}]>;
-def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr)), [{
-  if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
-    if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
-      if (PT->getAddressSpace() > 255)
-        return false;
-  return true;
-}]>;
-def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr)), [{
-  if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
-    if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
-      if (PT->getAddressSpace() > 255)
-        return false;
-  return true;
-}]>;
+def loadi8  : PatFrag<(ops node:$ptr), (i8  (dsload node:$ptr))>;
+def loadi64 : PatFrag<(ops node:$ptr), (i64 (dsload node:$ptr))>;
+def loadf32 : PatFrag<(ops node:$ptr), (f32 (dsload node:$ptr))>;
+def loadf64 : PatFrag<(ops node:$ptr), (f64 (dsload node:$ptr))>;
+def loadf80 : PatFrag<(ops node:$ptr), (f80 (dsload node:$ptr))>;
 
 def sextloadi16i8  : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>;
 def sextloadi32i8  : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>;
@@ -562,7 +522,7 @@ def ADJCALLSTACKUP32   : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
 }
 
 // x86-64 va_start lowering magic.
-let usesCustomInserter = 1 in
+let usesCustomInserter = 1 in {
 def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
                               (outs),
                               (ins GR8:$al,
@@ -573,6 +533,19 @@ def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
                                                          imm:$regsavefi,
                                                          imm:$offset)]>;
 
+// Dynamic stack allocation yields _alloca call for Cygwin/Mingw targets.  Calls
+// to _alloca is needed to probe the stack when allocating more than 4k bytes in
+// one go. Touching the stack at 4K increments is necessary to ensure that the
+// guard pages used by the OS virtual memory manager are allocated in correct
+// sequence.
+// The main point of having separate instruction are extra unmodelled effects
+// (compared to ordinary calls) like stack pointer change.
+
+def MINGW_ALLOCA : I<0, Pseudo, (outs), (ins),
+                     "# dynamic stack allocation",
+                     [(X86MingwAlloca)]>;
+}
+
 // Nop
 let neverHasSideEffects = 1 in {
   def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", []>;
@@ -596,7 +569,7 @@ let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in
                       "", []>;
 
 //===----------------------------------------------------------------------===//
-//  Control Flow Instructions...
+//  Control Flow Instructions.
 //
 
 // Return instructions.
@@ -614,16 +587,46 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
                     "lret\t$amt", []>;
 }
 
-// All branches are RawFrm, Void, Branch, and Terminators
-let isBranch = 1, isTerminator = 1 in
-  class IBr<bits<8> opcode, dag ins, string asm, list<dag> pattern> :
-        I<opcode, RawFrm, (outs), ins, asm, pattern>;
+// Unconditional branches.
+let isBarrier = 1, isBranch = 1, isTerminator = 1 in {
+  def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst),
+                        "jmp\t$dst", [(br bb:$dst)]>;
+  def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
+                       "jmp\t$dst", []>;
+}
 
-let isBranch = 1, isBarrier = 1 in {
-  def JMP : IBr<0xE9, (ins brtarget:$dst), "jmp\t$dst", [(br bb:$dst)]>;
-  def JMP8 : IBr<0xEB, (ins brtarget8:$dst), "jmp\t$dst", []>;
+// Conditional Branches.
+let isBranch = 1, isTerminator = 1, Uses = [EFLAGS] in {
+  multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> {
+    def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, []>;
+    def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget:$dst), asm,
+                       [(X86brcond bb:$dst, Cond, EFLAGS)]>, TB;
+  }
 }
 
+defm JO  : ICBr<0x70, 0x80, "jo\t$dst" , X86_COND_O>;
+defm JNO : ICBr<0x71, 0x81, "jno\t$dst" , X86_COND_NO>;
+defm JB  : ICBr<0x72, 0x82, "jb\t$dst" , X86_COND_B>;
+defm JAE : ICBr<0x73, 0x83, "jae\t$dst", X86_COND_AE>;
+defm JE  : ICBr<0x74, 0x84, "je\t$dst" , X86_COND_E>;
+defm JNE : ICBr<0x75, 0x85, "jne\t$dst", X86_COND_NE>;
+defm JBE : ICBr<0x76, 0x86, "jbe\t$dst", X86_COND_BE>;
+defm JA  : ICBr<0x77, 0x87, "ja\t$dst" , X86_COND_A>;
+defm JS  : ICBr<0x78, 0x88, "js\t$dst" , X86_COND_S>;
+defm JNS : ICBr<0x79, 0x89, "jns\t$dst", X86_COND_NS>;
+defm JP  : ICBr<0x7A, 0x8A, "jp\t$dst" , X86_COND_P>;
+defm JNP : ICBr<0x7B, 0x8B, "jnp\t$dst", X86_COND_NP>;
+defm JL  : ICBr<0x7C, 0x8C, "jl\t$dst" , X86_COND_L>;
+defm JGE : ICBr<0x7D, 0x8D, "jge\t$dst", X86_COND_GE>;
+defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>;
+defm JG  : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>;
+
+// FIXME: What about the CX/RCX versions of this instruction?
+let Uses = [ECX], isBranch = 1, isTerminator = 1 in
+  def JCXZ8 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+                       "jcxz\t$dst", []>;
+
+
 // Indirect branches
 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
   def JMP32r     : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst",
@@ -644,63 +647,6 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
                      "ljmp{l}\t{*}$dst", []>;
 }
 
-// Conditional branches
-let Uses = [EFLAGS] in {
-// Short conditional jumps
-def JO8   : IBr<0x70, (ins brtarget8:$dst), "jo\t$dst", []>;
-def JNO8  : IBr<0x71, (ins brtarget8:$dst), "jno\t$dst", []>;
-def JB8   : IBr<0x72, (ins brtarget8:$dst), "jb\t$dst", []>;
-def JAE8  : IBr<0x73, (ins brtarget8:$dst), "jae\t$dst", []>;
-def JE8   : IBr<0x74, (ins brtarget8:$dst), "je\t$dst", []>;
-def JNE8  : IBr<0x75, (ins brtarget8:$dst), "jne\t$dst", []>;
-def JBE8  : IBr<0x76, (ins brtarget8:$dst), "jbe\t$dst", []>;
-def JA8   : IBr<0x77, (ins brtarget8:$dst), "ja\t$dst", []>;
-def JS8   : IBr<0x78, (ins brtarget8:$dst), "js\t$dst", []>;
-def JNS8  : IBr<0x79, (ins brtarget8:$dst), "jns\t$dst", []>;
-def JP8   : IBr<0x7A, (ins brtarget8:$dst), "jp\t$dst", []>;
-def JNP8  : IBr<0x7B, (ins brtarget8:$dst), "jnp\t$dst", []>;
-def JL8   : IBr<0x7C, (ins brtarget8:$dst), "jl\t$dst", []>;
-def JGE8  : IBr<0x7D, (ins brtarget8:$dst), "jge\t$dst", []>;
-def JLE8  : IBr<0x7E, (ins brtarget8:$dst), "jle\t$dst", []>;
-def JG8   : IBr<0x7F, (ins brtarget8:$dst), "jg\t$dst", []>;
-
-def JCXZ8 : IBr<0xE3, (ins brtarget8:$dst), "jcxz\t$dst", []>;
-
-def JE  : IBr<0x84, (ins brtarget:$dst), "je\t$dst",
-              [(X86brcond bb:$dst, X86_COND_E, EFLAGS)]>, TB;
-def JNE : IBr<0x85, (ins brtarget:$dst), "jne\t$dst",
-              [(X86brcond bb:$dst, X86_COND_NE, EFLAGS)]>, TB;
-def JL  : IBr<0x8C, (ins brtarget:$dst), "jl\t$dst",
-              [(X86brcond bb:$dst, X86_COND_L, EFLAGS)]>, TB;
-def JLE : IBr<0x8E, (ins brtarget:$dst), "jle\t$dst",
-              [(X86brcond bb:$dst, X86_COND_LE, EFLAGS)]>, TB;
-def JG  : IBr<0x8F, (ins brtarget:$dst), "jg\t$dst",
-              [(X86brcond bb:$dst, X86_COND_G, EFLAGS)]>, TB;
-def JGE : IBr<0x8D, (ins brtarget:$dst), "jge\t$dst",
-              [(X86brcond bb:$dst, X86_COND_GE, EFLAGS)]>, TB;
-
-def JB  : IBr<0x82, (ins brtarget:$dst), "jb\t$dst",
-              [(X86brcond bb:$dst, X86_COND_B, EFLAGS)]>, TB;
-def JBE : IBr<0x86, (ins brtarget:$dst), "jbe\t$dst",
-              [(X86brcond bb:$dst, X86_COND_BE, EFLAGS)]>, TB;
-def JA  : IBr<0x87, (ins brtarget:$dst), "ja\t$dst",
-              [(X86brcond bb:$dst, X86_COND_A, EFLAGS)]>, TB;
-def JAE : IBr<0x83, (ins brtarget:$dst), "jae\t$dst",
-              [(X86brcond bb:$dst, X86_COND_AE, EFLAGS)]>, TB;
-
-def JS  : IBr<0x88, (ins brtarget:$dst), "js\t$dst",
-              [(X86brcond bb:$dst, X86_COND_S, EFLAGS)]>, TB;
-def JNS : IBr<0x89, (ins brtarget:$dst), "jns\t$dst",
-              [(X86brcond bb:$dst, X86_COND_NS, EFLAGS)]>, TB;
-def JP  : IBr<0x8A, (ins brtarget:$dst), "jp\t$dst",
-              [(X86brcond bb:$dst, X86_COND_P, EFLAGS)]>, TB;
-def JNP : IBr<0x8B, (ins brtarget:$dst), "jnp\t$dst",
-              [(X86brcond bb:$dst, X86_COND_NP, EFLAGS)]>, TB;
-def JO  : IBr<0x80, (ins brtarget:$dst), "jo\t$dst",
-              [(X86brcond bb:$dst, X86_COND_O, EFLAGS)]>, TB;
-def JNO : IBr<0x81, (ins brtarget:$dst), "jno\t$dst",
-              [(X86brcond bb:$dst, X86_COND_NO, EFLAGS)]>, TB;
-} // Uses = [EFLAGS]
 
 // Loop instructions
 
@@ -721,7 +667,7 @@ let isCall = 1 in
               XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
               XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
       Uses = [ESP] in {
-    def CALLpcrel32 : Ii32<0xE8, RawFrm,
+    def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm,
                            (outs), (ins i32imm_pcrel:$dst,variable_ops),
                            "call\t$dst", []>;
     def CALL32r     : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops),
@@ -761,8 +707,10 @@ def TCRETURNri : I<0, Pseudo, (outs),
                  "#TC_RETURN $dst $offset",
                  []>;
 
-let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
-  def TAILJMPd : IBr<0xE9, (ins i32imm_pcrel:$dst, variable_ops),
+// FIXME: The should be pseudo instructions that are lowered when going to
+// mcinst.
+let isCall = 1, isBranch = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
+  def TAILJMPd : Ii32<0xE9, RawFrm, (outs),(ins i32imm_pcrel:$dst,variable_ops),
                  "jmp\t$dst  # TAILCALL",
                  []>;
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
@@ -929,6 +877,9 @@ let Defs = [RAX, RDX] in
 def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>,
             TB;
 
+let Defs = [RAX, RCX, RDX] in
+def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB;
+
 let isBarrier = 1, hasCtrlDep = 1 in {
 def TRAP    : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB;
 }
@@ -1059,7 +1010,7 @@ def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
 def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
                     "mov{l}\t{$src, $dst|$dst, $src}", []>;
 
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in {
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
 def MOV8rm  : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src),
                 "mov{b}\t{$src, $dst|$dst, $src}",
                 [(set GR8:$dst, (loadi8 addr:$src))]>;
@@ -1093,7 +1044,7 @@ def MOV8mr_NOREX : I<0x88, MRMDestMem,
                      (outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src),
                      "mov{b}\t{$src, $dst|$dst, $src}  # NOREX", []>;
 let mayLoad = 1,
-    canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+    canFoldAsLoad = 1, isReMaterializable = 1 in
 def MOV8rm_NOREX : I<0x8A, MRMSrcMem,
                      (outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src),
                      "mov{b}\t{$src, $dst|$dst, $src}  # NOREX", []>;
@@ -1115,7 +1066,10 @@ def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG_32:$dst), (ins GR32:$src),
 //
 
 // Extra precision multiplication
-let Defs = [AL,AH,EFLAGS], Uses = [AL] in
+
+// AL is really implied by AX, by the registers in Defs must match the
+// SDNode results (i8, i32).
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
 def MUL8r  : I<0xF6, MRM4r, (outs),  (ins GR8:$src), "mul{b}\t$src",
                // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
                // This probably ought to be moved to a def : Pat<> if the
@@ -1133,7 +1087,7 @@ def MUL32r : I<0xF7, MRM4r, (outs),  (ins GR32:$src),
                "mul{l}\t$src",
                []>; // EAX,EDX = EAX*GR32
 
-let Defs = [AL,AH,EFLAGS], Uses = [AL] in
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
 def MUL8m  : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
                "mul{b}\t$src",
                // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
@@ -1155,7 +1109,7 @@ def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
 }
 
 let neverHasSideEffects = 1 in {
-let Defs = [AL,AH,EFLAGS], Uses = [AL] in
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
 def IMUL8r  : I<0xF6, MRM5r, (outs),  (ins GR8:$src), "imul{b}\t$src", []>;
               // AL,AH = AL*GR8
 let Defs = [AX,DX,EFLAGS], Uses = [AX] in
@@ -1165,7 +1119,7 @@ let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
 def IMUL32r : I<0xF7, MRM5r, (outs),  (ins GR32:$src), "imul{l}\t$src", []>;
               // EAX,EDX = EAX*GR32
 let mayLoad = 1 in {
-let Defs = [AL,AH,EFLAGS], Uses = [AL] in
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
 def IMUL8m  : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
                 "imul{b}\t$src", []>;    // AL,AH = AL*[mem8]
 let Defs = [AX,DX,EFLAGS], Uses = [AX] in
@@ -1178,7 +1132,7 @@ def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
 } // neverHasSideEffects
 
 // unsigned division/remainder
-let Defs = [AL,AH,EFLAGS], Uses = [AX] in
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
 def DIV8r  : I<0xF6, MRM6r, (outs),  (ins GR8:$src),    // AX/r8 = AL,AH
                "div{b}\t$src", []>;
 let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
@@ -1188,7 +1142,7 @@ let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
 def DIV32r : I<0xF7, MRM6r, (outs),  (ins GR32:$src),   // EDX:EAX/r32 = EAX,EDX
                "div{l}\t$src", []>;
 let mayLoad = 1 in {
-let Defs = [AL,AH,EFLAGS], Uses = [AX] in
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
 def DIV8m  : I<0xF6, MRM6m, (outs), (ins i8mem:$src),   // AX/[mem8] = AL,AH
                "div{b}\t$src", []>;
 let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
@@ -1201,7 +1155,7 @@ def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src),
 }
 
 // Signed division/remainder.
-let Defs = [AL,AH,EFLAGS], Uses = [AX] in
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
 def IDIV8r : I<0xF6, MRM7r, (outs),  (ins GR8:$src),    // AX/r8 = AL,AH
                "idiv{b}\t$src", []>;
 let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
@@ -1211,7 +1165,7 @@ let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
 def IDIV32r: I<0xF7, MRM7r, (outs),  (ins GR32:$src),   // EDX:EAX/r32 = EAX,EDX
                "idiv{l}\t$src", []>;
 let mayLoad = 1, mayLoad = 1 in {
-let Defs = [AL,AH,EFLAGS], Uses = [AX] in
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
 def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src),   // AX/[mem8] = AL,AH
                "idiv{b}\t$src", []>;
 let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
@@ -2328,98 +2282,100 @@ let isTwoAddress = 0 in {
 
 def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src),
                "rcl{b}\t{1, $dst|$dst, 1}", []>;
-def RCL8m1 : I<0xD0, MRM2m, (outs i8mem:$dst), (ins i8mem:$src),
-               "rcl{b}\t{1, $dst|$dst, 1}", []>;
 let Uses = [CL] in {
 def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src),
                 "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
-def RCL8mCL : I<0xD2, MRM2m, (outs i8mem:$dst), (ins i8mem:$src),
-                "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
 }
 def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt),
                  "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCL8mi : Ii8<0xC0, MRM2m, (outs i8mem:$dst), (ins i8mem:$src, i8imm:$cnt),
-                 "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
   
 def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src),
                 "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize;
-def RCL16m1 : I<0xD1, MRM2m, (outs i16mem:$dst), (ins i16mem:$src),
-                "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize;
 let Uses = [CL] in {
 def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src),
                  "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
-def RCL16mCL : I<0xD3, MRM2m, (outs i16mem:$dst), (ins i16mem:$src),
-                 "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
 }
 def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt),
                   "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
-def RCL16mi : Ii8<0xC1, MRM2m, (outs i16mem:$dst), 
-                  (ins i16mem:$src, i8imm:$cnt),
-                  "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
 
 def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src),
                 "rcl{l}\t{1, $dst|$dst, 1}", []>;
-def RCL32m1 : I<0xD1, MRM2m, (outs i32mem:$dst), (ins i32mem:$src),
-                "rcl{l}\t{1, $dst|$dst, 1}", []>;
 let Uses = [CL] in {
 def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src),
                  "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
-def RCL32mCL : I<0xD3, MRM2m, (outs i32mem:$dst), (ins i32mem:$src),
-                 "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
 }
 def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt),
                   "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCL32mi : Ii8<0xC1, MRM2m, (outs i32mem:$dst), 
-                  (ins i32mem:$src, i8imm:$cnt),
-                  "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
                   
 def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src),
                "rcr{b}\t{1, $dst|$dst, 1}", []>;
-def RCR8m1 : I<0xD0, MRM3m, (outs i8mem:$dst), (ins i8mem:$src),
-               "rcr{b}\t{1, $dst|$dst, 1}", []>;
 let Uses = [CL] in {
 def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src),
                 "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
-def RCR8mCL : I<0xD2, MRM3m, (outs i8mem:$dst), (ins i8mem:$src),
-                "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
 }
 def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt),
                  "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCR8mi : Ii8<0xC0, MRM3m, (outs i8mem:$dst), (ins i8mem:$src, i8imm:$cnt),
-                 "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
   
 def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src),
                 "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize;
-def RCR16m1 : I<0xD1, MRM3m, (outs i16mem:$dst), (ins i16mem:$src),
-                "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize;
 let Uses = [CL] in {
 def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src),
                  "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
-def RCR16mCL : I<0xD3, MRM3m, (outs i16mem:$dst), (ins i16mem:$src),
-                 "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
 }
 def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt),
                   "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
-def RCR16mi : Ii8<0xC1, MRM3m, (outs i16mem:$dst), 
-                  (ins i16mem:$src, i8imm:$cnt),
-                  "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
 
 def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src),
                 "rcr{l}\t{1, $dst|$dst, 1}", []>;
-def RCR32m1 : I<0xD1, MRM3m, (outs i32mem:$dst), (ins i32mem:$src),
-                "rcr{l}\t{1, $dst|$dst, 1}", []>;
 let Uses = [CL] in {
 def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src),
                  "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
-def RCR32mCL : I<0xD3, MRM3m, (outs i32mem:$dst), (ins i32mem:$src),
-                 "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
 }
 def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt),
                   "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCR32mi : Ii8<0xC1, MRM3m, (outs i32mem:$dst), 
-                  (ins i32mem:$src, i8imm:$cnt),
+
+let isTwoAddress = 0 in {
+def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst),
+               "rcl{b}\t{1, $dst|$dst, 1}", []>;
+def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt),
+                 "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCL16m1 : I<0xD1, MRM2m, (outs), (ins i16mem:$dst),
+                "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize;
+def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, i8imm:$cnt),
+                  "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+def RCL32m1 : I<0xD1, MRM2m, (outs), (ins i32mem:$dst),
+                "rcl{l}\t{1, $dst|$dst, 1}", []>;
+def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, i8imm:$cnt),
+                  "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCR8m1 : I<0xD0, MRM3m, (outs), (ins i8mem:$dst),
+               "rcr{b}\t{1, $dst|$dst, 1}", []>;
+def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, i8imm:$cnt),
+                 "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCR16m1 : I<0xD1, MRM3m, (outs), (ins i16mem:$dst),
+                "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize;
+def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, i8imm:$cnt),
+                  "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+def RCR32m1 : I<0xD1, MRM3m, (outs), (ins i32mem:$dst),
+                "rcr{l}\t{1, $dst|$dst, 1}", []>;
+def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, i8imm:$cnt),
                   "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
 
+let Uses = [CL] in {
+def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst),
+                "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
+def RCL16mCL : I<0xD3, MRM2m, (outs), (ins i16mem:$dst),
+                 "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+def RCL32mCL : I<0xD3, MRM2m, (outs), (ins i32mem:$dst),
+                 "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
+def RCR8mCL : I<0xD2, MRM3m, (outs), (ins i8mem:$dst),
+                "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
+def RCR16mCL : I<0xD3, MRM3m, (outs), (ins i16mem:$dst),
+                 "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst),
+                 "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
+}
+}
+
 // FIXME: provide shorter instructions when imm8 == 1
 let Uses = [CL] in {
 def ROL8rCL  : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src),
@@ -4100,7 +4056,7 @@ def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
 def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
                 "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB;
                 
-def INVLPG : I<0x01, RawFrm, (outs), (ins), "invlpg", []>, TB;
+def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB;
 
 def STRr : I<0x00, MRM1r, (outs GR16:$dst), (ins),
              "str{w}\t{$dst}", []>, TB;
@@ -4262,17 +4218,17 @@ def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB;
 // VMX instructions
 
 // 66 0F 38 80
-def INVEPT : I<0x38, RawFrm, (outs), (ins), "invept", []>, OpSize, TB;
+def INVEPT : I<0x80, RawFrm, (outs), (ins), "invept", []>, OpSize, T8;
 // 66 0F 38 81
-def INVVPID : I<0x38, RawFrm, (outs), (ins), "invvpid", []>, OpSize, TB;
+def INVVPID : I<0x81, RawFrm, (outs), (ins), "invvpid", []>, OpSize, T8;
 // 0F 01 C1
-def VMCALL : I<0x01, RawFrm, (outs), (ins), "vmcall", []>, TB;
+def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB;
 def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
   "vmclear\t$vmcs", []>, OpSize, TB;
 // 0F 01 C2
-def VMLAUNCH : I<0x01, RawFrm, (outs), (ins), "vmlaunch", []>, TB;
+def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB;
 // 0F 01 C3
-def VMRESUME : I<0x01, RawFrm, (outs), (ins), "vmresume", []>, TB;
+def VMRESUME : I<0x01, MRM_C3, (outs), (ins), "vmresume", []>, TB;
 def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
   "vmptrld\t$vmcs", []>, TB;
 def VMPTRSTm : I<0xC7, MRM7m, (outs i64mem:$vmcs), (ins),
@@ -4294,7 +4250,7 @@ def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
 def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
   "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB;
 // 0F 01 C4
-def VMXOFF : I<0x01, RawFrm, (outs), (ins), "vmxoff", []>, OpSize;
+def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB;
 def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon),
   "vmxon\t{$vmxon}", []>, XD;
 
@@ -4462,12 +4418,6 @@ def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8  GR8 :$src)>;
 def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8  GR8 :$src)>;
 def : Pat<(i32 (anyext GR16:$src)), (MOVZX32rr16 GR16:$src)>;
 
-// (and (i32 load), 255) -> (zextload i8)
-def : Pat<(i32 (and (nvloadi32 addr:$src), (i32 255))),
-          (MOVZX32rm8 addr:$src)>;
-def : Pat<(i32 (and (nvloadi32 addr:$src), (i32 65535))),
-          (MOVZX32rm16 addr:$src)>;
-
 //===----------------------------------------------------------------------===//
 // Some peepholes
 //===----------------------------------------------------------------------===//
@@ -4563,43 +4513,43 @@ def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
 def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
 
 // (shl x (and y, 31)) ==> (shl x, y)
-def : Pat<(shl GR8:$src1, (and CL:$amt, 31)),
+def : Pat<(shl GR8:$src1, (and CL, 31)),
           (SHL8rCL GR8:$src1)>;
-def : Pat<(shl GR16:$src1, (and CL:$amt, 31)),
+def : Pat<(shl GR16:$src1, (and CL, 31)),
           (SHL16rCL GR16:$src1)>;
-def : Pat<(shl GR32:$src1, (and CL:$amt, 31)),
+def : Pat<(shl GR32:$src1, (and CL, 31)),
           (SHL32rCL GR32:$src1)>;
-def : Pat<(store (shl (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+def : Pat<(store (shl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
           (SHL8mCL addr:$dst)>;
-def : Pat<(store (shl (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+def : Pat<(store (shl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
           (SHL16mCL addr:$dst)>;
-def : Pat<(store (shl (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+def : Pat<(store (shl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
           (SHL32mCL addr:$dst)>;
 
-def : Pat<(srl GR8:$src1, (and CL:$amt, 31)),
+def : Pat<(srl GR8:$src1, (and CL, 31)),
           (SHR8rCL GR8:$src1)>;
-def : Pat<(srl GR16:$src1, (and CL:$amt, 31)),
+def : Pat<(srl GR16:$src1, (and CL, 31)),
           (SHR16rCL GR16:$src1)>;
-def : Pat<(srl GR32:$src1, (and CL:$amt, 31)),
+def : Pat<(srl GR32:$src1, (and CL, 31)),
           (SHR32rCL GR32:$src1)>;
-def : Pat<(store (srl (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+def : Pat<(store (srl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
           (SHR8mCL addr:$dst)>;
-def : Pat<(store (srl (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+def : Pat<(store (srl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
           (SHR16mCL addr:$dst)>;
-def : Pat<(store (srl (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+def : Pat<(store (srl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
           (SHR32mCL addr:$dst)>;
 
-def : Pat<(sra GR8:$src1, (and CL:$amt, 31)),
+def : Pat<(sra GR8:$src1, (and CL, 31)),
           (SAR8rCL GR8:$src1)>;
-def : Pat<(sra GR16:$src1, (and CL:$amt, 31)),
+def : Pat<(sra GR16:$src1, (and CL, 31)),
           (SAR16rCL GR16:$src1)>;
-def : Pat<(sra GR32:$src1, (and CL:$amt, 31)),
+def : Pat<(sra GR32:$src1, (and CL, 31)),
           (SAR32rCL GR32:$src1)>;
-def : Pat<(store (sra (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+def : Pat<(store (sra (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
           (SAR8mCL addr:$dst)>;
-def : Pat<(store (sra (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
           (SAR16mCL addr:$dst)>;
-def : Pat<(store (sra (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
           (SAR32mCL addr:$dst)>;
 
 // (or (x >> c) | (y << (32 - c))) ==> (shrd32 x, y, c)
@@ -4620,11 +4570,11 @@ def : Pat<(store (or (srl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))),
                  addr:$dst),
           (SHRD32mrCL addr:$dst, GR32:$src2)>;
 
-def : Pat<(shrd GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm:$amt2)),
+def : Pat<(shrd GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm/*:$amt2*/)),
           (SHRD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>;
 
 def : Pat<(store (shrd (loadi32 addr:$dst), (i8 imm:$amt1),
-                       GR32:$src2, (i8 imm:$amt2)), addr:$dst),
+                       GR32:$src2, (i8 imm/*:$amt2*/)), addr:$dst),
           (SHRD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>;
 
 // (or (x << c) | (y >> (32 - c))) ==> (shld32 x, y, c)
@@ -4645,11 +4595,11 @@ def : Pat<(store (or (shl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))),
                  addr:$dst),
           (SHLD32mrCL addr:$dst, GR32:$src2)>;
 
-def : Pat<(shld GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm:$amt2)),
+def : Pat<(shld GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm/*:$amt2*/)),
           (SHLD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>;
 
 def : Pat<(store (shld (loadi32 addr:$dst), (i8 imm:$amt1),
-                       GR32:$src2, (i8 imm:$amt2)), addr:$dst),
+                       GR32:$src2, (i8 imm/*:$amt2*/)), addr:$dst),
           (SHLD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>;
 
 // (or (x >> c) | (y << (16 - c))) ==> (shrd16 x, y, c)
@@ -4670,11 +4620,11 @@ def : Pat<(store (or (srl (loadi16 addr:$dst), (i8 (trunc CX:$amt))),
                  addr:$dst),
           (SHRD16mrCL addr:$dst, GR16:$src2)>;
 
-def : Pat<(shrd GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm:$amt2)),
+def : Pat<(shrd GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm/*:$amt2*/)),
           (SHRD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>;
 
 def : Pat<(store (shrd (loadi16 addr:$dst), (i8 imm:$amt1),
-                       GR16:$src2, (i8 imm:$amt2)), addr:$dst),
+                       GR16:$src2, (i8 imm/*:$amt2*/)), addr:$dst),
           (SHRD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>;
 
 // (or (x << c) | (y >> (16 - c))) ==> (shld16 x, y, c)
@@ -4695,11 +4645,11 @@ def : Pat<(store (or (shl (loadi16 addr:$dst), (i8 (trunc CX:$amt))),
                  addr:$dst),
           (SHLD16mrCL addr:$dst, GR16:$src2)>;
 
-def : Pat<(shld GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm:$amt2)),
+def : Pat<(shld GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm/*:$amt2*/)),
           (SHLD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>;
 
 def : Pat<(store (shld (loadi16 addr:$dst), (i8 imm:$amt1),
-                       GR16:$src2, (i8 imm:$amt2)), addr:$dst),
+                       GR16:$src2, (i8 imm/*:$amt2*/)), addr:$dst),
           (SHLD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>;
 
 // (anyext (setcc_carry)) -> (setcc_carry)
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 89f020c..c8e0723 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -141,7 +141,7 @@ def MMX_MOVD64rrv164 : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
 let neverHasSideEffects = 1 in
 def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
                         "movq\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
 def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
                         "movq\t{$src, $dst|$dst, $src}",
                         [(set VR64:$dst, (load_mmx addr:$src))]>;
@@ -426,13 +426,15 @@ def MMX_CVTTPS2PIrm : MMXI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
 
 
 // Extract / Insert
-def MMX_X86pextrw : SDNode<"X86ISD::PEXTRW", SDTypeProfile<1, 2, []>, []>;
-def MMX_X86pinsrw : SDNode<"X86ISD::PINSRW", SDTypeProfile<1, 3, []>, []>;
+def MMX_X86pinsrw : SDNode<"X86ISD::MMX_PINSRW",
+                    SDTypeProfile<1, 3, [SDTCisVT<0, v4i16>, SDTCisSameAs<0,1>,
+                                         SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
+
 
 def MMX_PEXTRWri  : MMXIi8<0xC5, MRMSrcReg,
                            (outs GR32:$dst), (ins VR64:$src1, i16i8imm:$src2),
                            "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                           [(set GR32:$dst, (MMX_X86pextrw (v4i16 VR64:$src1),
+                           [(set GR32:$dst, (X86pextrw (v4i16 VR64:$src1),
                                              (iPTR imm:$src2)))]>;
 let Constraints = "$src1 = $dst" in {
   def MMX_PINSRWrri : MMXIi8<0xC4, MRMSrcReg,
@@ -597,13 +599,6 @@ let AddedComplexity = 10 in {
             (MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>;
 }
 
-// Patterns to perform vector shuffling with a zeroed out vector.
-let AddedComplexity = 20 in {
-  def : Pat<(bc_v2i32 (mmx_unpckl immAllZerosV,
-                       (v2i32 (scalar_to_vector (load_mmx addr:$src))))),
-            (MMX_PUNPCKLDQrm VR64:$src, VR64:$src)>;
-}
-
 // Some special case PANDN patterns.
 // FIXME: Get rid of these.
 def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))),
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index e26c979..2743dba 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -160,6 +160,32 @@ def memopv4i16 : PatFrag<(ops node:$ptr), (v4i16 (memop64 node:$ptr))>;
 def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop64 node:$ptr))>;
 def memopv2i32 : PatFrag<(ops node:$ptr), (v2i32 (memop64 node:$ptr))>;
 
+// MOVNT Support
+// Like 'store', but requires the non-temporal bit to be set
+def nontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+                           (st node:$val, node:$ptr), [{
+  if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+    return ST->isNonTemporal();
+  return false;
+}]>;
+
+def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+			           (st node:$val, node:$ptr), [{
+  if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+    return ST->isNonTemporal() && !ST->isTruncatingStore() &&
+           ST->getAddressingMode() == ISD::UNINDEXED &&
+           ST->getAlignment() >= 16;
+  return false;
+}]>;
+
+def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+			           (st node:$val, node:$ptr), [{
+  if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+    return ST->isNonTemporal() &&
+           ST->getAlignment() < 16;
+  return false;
+}]>;
+
 def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
 def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
 def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>;
@@ -344,18 +370,56 @@ let Uses = [EFLAGS], usesCustomInserter = 1 in {
 // SSE1 Instructions
 //===----------------------------------------------------------------------===//
 
-// Move Instructions
-let neverHasSideEffects = 1 in
-def MOVSSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
-                  "movss\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+// Move Instructions. Register-to-register movss is not used for FR32
+// register copies because it's a partial register update; FsMOVAPSrr is
+// used instead. Register-to-register movss is not modeled as an INSERT_SUBREG
+// because INSERT_SUBREG requires that the insert be implementable in terms of
+// a copy, and just mentioned, we don't use movss for copies.
+let Constraints = "$src1 = $dst" in
+def MOVSSrr : SSI<0x10, MRMSrcReg,
+                  (outs VR128:$dst), (ins VR128:$src1, FR32:$src2),
+                  "movss\t{$src2, $dst|$dst, $src2}",
+                  [(set VR128:$dst,
+                        (movl VR128:$src1, (scalar_to_vector FR32:$src2)))]>;
+
+// Extract the low 32-bit value from one vector and insert it into another.
+let AddedComplexity = 15 in
+def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)),
+          (MOVSSrr VR128:$src1,
+                   (EXTRACT_SUBREG (v4f32 VR128:$src2), x86_subreg_ss))>;
+
+// Implicitly promote a 32-bit scalar to a vector.
+def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
+          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, x86_subreg_ss)>;
+
+// Loading from memory automatically zeroing upper bits.
+let canFoldAsLoad = 1, isReMaterializable = 1 in
 def MOVSSrm : SSI<0x10, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
                   "movss\t{$src, $dst|$dst, $src}",
                   [(set FR32:$dst, (loadf32 addr:$src))]>;
+
+// MOVSSrm zeros the high parts of the register; represent this
+// with SUBREG_TO_REG.
+let AddedComplexity = 20 in {
+def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
+          (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>;
+def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
+          (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>;
+def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
+          (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>;
+}
+
+// Store scalar value to memory.
 def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
                   "movss\t{$src, $dst|$dst, $src}",
                   [(store FR32:$src, addr:$dst)]>;
 
+// Extract and store.
+def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+                 addr:$dst),
+          (MOVSSmr addr:$dst,
+                   (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>;
+
 // Conversion instructions
 def CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src),
                       "cvttss2si\t{$src, $dst|$dst, $src}",
@@ -518,7 +582,7 @@ def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
 
 // Alias instruction to load FR32 from f128mem using movaps. Upper bits are
 // disregarded.
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
 def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
                      "movaps\t{$src, $dst|$dst, $src}",
                      [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>;
@@ -715,7 +779,7 @@ defm MIN : sse1_fp_binop_rm<0x5D, "min", X86fmin,
 let neverHasSideEffects = 1 in
 def MOVAPSrr : PSI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                    "movaps\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
 def MOVAPSrm : PSI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                    "movaps\t{$src, $dst|$dst, $src}",
                    [(set VR128:$dst, (alignedloadv4f32 addr:$src))]>;
@@ -727,7 +791,7 @@ def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
 let neverHasSideEffects = 1 in
 def MOVUPSrr : PSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                    "movups\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
 def MOVUPSrm : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                    "movups\t{$src, $dst|$dst, $src}",
                    [(set VR128:$dst, (loadv4f32 addr:$src))]>;
@@ -736,7 +800,7 @@ def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
                    [(store (v4f32 VR128:$src), addr:$dst)]>;
 
 // Intrinsic forms of MOVUPS load and store
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
 def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                        "movups\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>;
@@ -762,6 +826,9 @@ let Constraints = "$src1 = $dst" in {
 } // Constraints = "$src1 = $dst"
 
 
+def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
+          (MOVHPSrm VR128:$src1, addr:$src2)>;
+
 def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                    "movlps\t{$src, $dst|$dst, $src}",
                    [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
@@ -793,9 +860,9 @@ def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
 
 let AddedComplexity = 20 in {
 def : Pat<(v4f32 (movddup VR128:$src, (undef))),
-          (MOVLHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
+          (MOVLHPSrr VR128:$src, VR128:$src)>;
 def : Pat<(v2i64 (movddup VR128:$src, (undef))),
-          (MOVLHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
+          (MOVLHPSrr VR128:$src, VR128:$src)>;
 }
 
 
@@ -1010,10 +1077,33 @@ def PREFETCHNTA  : PSI<0x18, MRM0m, (outs), (ins i8mem:$src),
     "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>;
 
 // Non-temporal stores
-def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+def MOVNTPSmr_Int : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
                     "movntps\t{$src, $dst|$dst, $src}",
                     [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>;
 
+let AddedComplexity = 400 in { // Prefer non-temporal versions
+def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                    "movntps\t{$src, $dst|$dst, $src}",
+                    [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
+
+def MOVNTDQ_64mr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                    "movntdq\t{$src, $dst|$dst, $src}",
+                    [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>;
+
+def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
+          (MOVNTDQ_64mr VR128:$src, addr:$dst)>;
+
+def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+                 "movnti\t{$src, $dst|$dst, $src}",
+                 [(nontemporalstore (i32 GR32:$src), addr:$dst)]>,
+               TB, Requires<[HasSSE2]>;
+
+def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+                     "movnti\t{$src, $dst|$dst, $src}",
+                     [(nontemporalstore (i64 GR64:$src), addr:$dst)]>,
+                  TB, Requires<[HasSSE2]>;
+}
+
 // Load, store, and memory fence
 def SFENCE : PSI<0xAE, MRM7r, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>;
 
@@ -1032,84 +1122,73 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
 def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
                  [(set VR128:$dst, (v4i32 immAllZerosV))]>;
 
-let Predicates = [HasSSE1] in {
-  def : Pat<(v2i64 immAllZerosV), (V_SET0)>;
-  def : Pat<(v8i16 immAllZerosV), (V_SET0)>;
-  def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
-  def : Pat<(v2f64 immAllZerosV), (V_SET0)>;
-  def : Pat<(v4f32 immAllZerosV), (V_SET0)>;
-}
-
-// FR32 to 128-bit vector conversion.
-let isAsCheapAsAMove = 1 in
-def MOVSS2PSrr : SSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR32:$src),
-                      "movss\t{$src, $dst|$dst, $src}",
-                      [(set VR128:$dst,
-                        (v4f32 (scalar_to_vector FR32:$src)))]>;
-def MOVSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
-                     "movss\t{$src, $dst|$dst, $src}",
-                     [(set VR128:$dst,
-                       (v4f32 (scalar_to_vector (loadf32 addr:$src))))]>;
-
-// FIXME: may not be able to eliminate this movss with coalescing the src and
-// dest register classes are different. We really want to write this pattern
-// like this:
-// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
-//           (f32 FR32:$src)>;
-let isAsCheapAsAMove = 1 in
-def MOVPS2SSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins VR128:$src),
-                     "movss\t{$src, $dst|$dst, $src}",
-                     [(set FR32:$dst, (vector_extract (v4f32 VR128:$src),
-                                       (iPTR 0)))]>;
-def MOVPS2SSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src),
-                     "movss\t{$src, $dst|$dst, $src}",
-                     [(store (f32 (vector_extract (v4f32 VR128:$src),
-                                   (iPTR 0))), addr:$dst)]>;
-
-
-// Move to lower bits of a VR128, leaving upper bits alone.
-// Three operand (but two address) aliases.
-let Constraints = "$src1 = $dst" in {
-let neverHasSideEffects = 1 in
-  def MOVLSS2PSrr : SSI<0x10, MRMSrcReg,
-                        (outs VR128:$dst), (ins VR128:$src1, FR32:$src2),
-                        "movss\t{$src2, $dst|$dst, $src2}", []>;
-
-  let AddedComplexity = 15 in
-    def MOVLPSrr : SSI<0x10, MRMSrcReg,
-                       (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                       "movss\t{$src2, $dst|$dst, $src2}",
-                       [(set VR128:$dst,
-                         (v4f32 (movl VR128:$src1, VR128:$src2)))]>;
-}
+def : Pat<(v2i64 immAllZerosV), (V_SET0)>;
+def : Pat<(v8i16 immAllZerosV), (V_SET0)>;
+def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
+def : Pat<(v2f64 immAllZerosV), (V_SET0)>;
+def : Pat<(v4f32 immAllZerosV), (V_SET0)>;
 
-// Move to lower bits of a VR128 and zeroing upper bits.
-// Loading from memory automatically zeroing upper bits.
-let AddedComplexity = 20 in
-def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
-                      "movss\t{$src, $dst|$dst, $src}",
-                   [(set VR128:$dst, (v4f32 (X86vzmovl (v4f32 (scalar_to_vector
-                                                    (loadf32 addr:$src))))))]>;
-
-def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
-          (MOVZSS2PSrm addr:$src)>;
+def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+          (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>;
 
 //===---------------------------------------------------------------------===//
 // SSE2 Instructions
 //===---------------------------------------------------------------------===//
 
-// Move Instructions
-let neverHasSideEffects = 1 in
-def MOVSDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
-                  "movsd\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+// Move Instructions. Register-to-register movsd is not used for FR64
+// register copies because it's a partial register update; FsMOVAPDrr is
+// used instead. Register-to-register movsd is not modeled as an INSERT_SUBREG
+// because INSERT_SUBREG requires that the insert be implementable in terms of
+// a copy, and just mentioned, we don't use movsd for copies.
+let Constraints = "$src1 = $dst" in
+def MOVSDrr : SDI<0x10, MRMSrcReg,
+                  (outs VR128:$dst), (ins VR128:$src1, FR64:$src2),
+                  "movsd\t{$src2, $dst|$dst, $src2}",
+                  [(set VR128:$dst,
+                        (movl VR128:$src1, (scalar_to_vector FR64:$src2)))]>;
+
+// Extract the low 64-bit value from one vector and insert it into another.
+let AddedComplexity = 15 in
+def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)),
+          (MOVSDrr VR128:$src1,
+                   (EXTRACT_SUBREG (v2f64 VR128:$src2), x86_subreg_sd))>;
+
+// Implicitly promote a 64-bit scalar to a vector.
+def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
+          (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, x86_subreg_sd)>;
+
+// Loading from memory automatically zeroing upper bits.
+let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 20 in
 def MOVSDrm : SDI<0x10, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
                   "movsd\t{$src, $dst|$dst, $src}",
                   [(set FR64:$dst, (loadf64 addr:$src))]>;
+
+// MOVSDrm zeros the high parts of the register; represent this
+// with SUBREG_TO_REG.
+let AddedComplexity = 20 in {
+def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
+          (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
+def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
+          (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
+def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
+          (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
+def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
+          (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
+def : Pat<(v2f64 (X86vzload addr:$src)),
+          (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
+}
+
+// Store scalar value to memory.
 def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
                   "movsd\t{$src, $dst|$dst, $src}",
                   [(store FR64:$src, addr:$dst)]>;
 
+// Extract and store.
+def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+                 addr:$dst),
+          (MOVSDmr addr:$dst,
+                   (EXTRACT_SUBREG (v2f64 VR128:$src), x86_subreg_sd))>;
+
 // Conversion instructions
 def CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src),
                       "cvttsd2si\t{$src, $dst|$dst, $src}",
@@ -1163,7 +1242,8 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
                  Requires<[HasSSE2, OptForSize]>;
 
 def : Pat<(extloadf32 addr:$src),
-          (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[HasSSE2, OptForSpeed]>;
+          (CVTSS2SDrr (MOVSSrm addr:$src))>,
+      Requires<[HasSSE2, OptForSpeed]>;
 
 // Match intrinsics which expect XMM operand(s).
 def Int_CVTSD2SIrr : SDI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
@@ -1282,7 +1362,7 @@ def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
 
 // Alias instruction to load FR64 from f128mem using movapd. Upper bits are
 // disregarded.
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
 def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
                      "movapd\t{$src, $dst|$dst, $src}",
                      [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>;
@@ -1480,7 +1560,7 @@ defm MIN : sse2_fp_binop_rm<0x5D, "min", X86fmin,
 let neverHasSideEffects = 1 in
 def MOVAPDrr : PDI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                    "movapd\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
 def MOVAPDrm : PDI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                    "movapd\t{$src, $dst|$dst, $src}",
                    [(set VR128:$dst, (alignedloadv2f64 addr:$src))]>;
@@ -2295,34 +2375,47 @@ def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
                      [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>;
 
 // Non-temporal stores
-def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
-                    "movntpd\t{$src, $dst|$dst, $src}",
-                    [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>;
-def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
-                    "movntdq\t{$src, $dst|$dst, $src}",
-                    [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
-def MOVNTImr  :   I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+def MOVNTPDmr_Int : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+                        "movntpd\t{$src, $dst|$dst, $src}",
+                        [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>;
+def MOVNTDQmr_Int : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                        "movntdq\t{$src, $dst|$dst, $src}",
+                        [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
+def MOVNTImr_Int  :   I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
                     "movnti\t{$src, $dst|$dst, $src}",
                     [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>,
                   TB, Requires<[HasSSE2]>;
 
+let AddedComplexity = 400 in { // Prefer non-temporal versions
+def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                    "movntpd\t{$src, $dst|$dst, $src}",
+                    [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>;
+
+def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                    "movntdq\t{$src, $dst|$dst, $src}",
+                    [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
+
+def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst),
+          (MOVNTDQmr VR128:$src, addr:$dst)>;
+}
+
 // Flush cache
 def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
                "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
               TB, Requires<[HasSSE2]>;
 
 // Load, store, and memory fence
-def LFENCE : I<0xAE, MRM5r, (outs), (ins),
+def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
                "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
-def MFENCE : I<0xAE, MRM6r, (outs), (ins),
+def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
                "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
 
 //TODO: custom lower this so as to never even generate the noop
-def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss),
+def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
            (i8 0)), (NOOP)>;
 def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
 def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
-def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss),
+def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
            (i8 1)), (MFENCE)>;
 
 // Alias instructions that map zero vector to pxor / xorp* for sse.
@@ -2334,17 +2427,6 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
   def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
                          [(set VR128:$dst, (v4i32 immAllOnesV))]>;
 
-// FR64 to 128-bit vector conversion.
-let isAsCheapAsAMove = 1 in
-def MOVSD2PDrr : SDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR64:$src),
-                      "movsd\t{$src, $dst|$dst, $src}",
-                      [(set VR128:$dst,
-                        (v2f64 (scalar_to_vector FR64:$src)))]>;
-def MOVSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
-                     "movsd\t{$src, $dst|$dst, $src}",
-                     [(set VR128:$dst,
-                       (v2f64 (scalar_to_vector (loadf64 addr:$src))))]>;
-
 def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst,
@@ -2373,20 +2455,9 @@ def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
                       [(store (i64 (vector_extract (v2i64 VR128:$src),
                                     (iPTR 0))), addr:$dst)]>;
 
-// FIXME: may not be able to eliminate this movss with coalescing the src and
-// dest register classes are different. We really want to write this pattern
-// like this:
-// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
-//           (f32 FR32:$src)>;
-let isAsCheapAsAMove = 1 in
-def MOVPD2SDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins VR128:$src),
-                     "movsd\t{$src, $dst|$dst, $src}",
-                     [(set FR64:$dst, (vector_extract (v2f64 VR128:$src),
-                                       (iPTR 0)))]>;
-def MOVPD2SDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
-                     "movsd\t{$src, $dst|$dst, $src}",
-                     [(store (f64 (vector_extract (v2f64 VR128:$src),
-                                   (iPTR 0))), addr:$dst)]>;
+def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+          (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), x86_subreg_sd))>;
+
 def MOVPDI2DIrr  : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
                        "movd\t{$src, $dst|$dst, $src}",
                        [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
@@ -2403,44 +2474,11 @@ def MOVSS2DImr  : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>;
 
-
-// Move to lower bits of a VR128, leaving upper bits alone.
-// Three operand (but two address) aliases.
-let Constraints = "$src1 = $dst" in {
-  let neverHasSideEffects = 1 in
-  def MOVLSD2PDrr : SDI<0x10, MRMSrcReg,
-                        (outs VR128:$dst), (ins VR128:$src1, FR64:$src2),
-                        "movsd\t{$src2, $dst|$dst, $src2}", []>;
-
-  let AddedComplexity = 15 in
-    def MOVLPDrr : SDI<0x10, MRMSrcReg,
-                       (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                       "movsd\t{$src2, $dst|$dst, $src2}",
-                       [(set VR128:$dst,
-                         (v2f64 (movl VR128:$src1, VR128:$src2)))]>;
-}
-
 // Store / copy lower 64-bits of a XMM register.
 def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
                      "movq\t{$src, $dst|$dst, $src}",
                      [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>;
 
-// Move to lower bits of a VR128 and zeroing upper bits.
-// Loading from memory automatically zeroing upper bits.
-let AddedComplexity = 20 in {
-def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
-                      "movsd\t{$src, $dst|$dst, $src}",
-                      [(set VR128:$dst,
-                        (v2f64 (X86vzmovl (v2f64 (scalar_to_vector
-                                                 (loadf64 addr:$src))))))]>;
-
-def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
-            (MOVZSD2PDrm addr:$src)>;
-def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
-            (MOVZSD2PDrm addr:$src)>;
-def : Pat<(v2f64 (X86vzload addr:$src)), (MOVZSD2PDrm addr:$src)>;
-}
-
 // movd / movq to XMM register zero-extends
 let AddedComplexity = 15 in {
 def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
@@ -2613,9 +2651,9 @@ let Constraints = "$src1 = $dst" in {
 }
 
 // Thread synchronization
-def MONITOR : I<0x01, MRM1r, (outs), (ins), "monitor",
+def MONITOR : I<0x01, MRM_C8, (outs), (ins), "monitor",
                 [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>;
-def MWAIT   : I<0x01, MRM1r, (outs), (ins), "mwait",
+def MWAIT   : I<0x01, MRM_C9, (outs), (ins), "mwait",
                 [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>;
 
 // vector_shuffle v1, <undef> <1, 1, 3, 3>
@@ -2986,13 +3024,15 @@ let Predicates = [HasSSE2] in {
 let AddedComplexity = 15 in {
 // Zeroing a VR128 then do a MOVS{S|D} to the lower bits.
 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
-          (MOVLSD2PDrr (V_SET0), FR64:$src)>, Requires<[HasSSE2]>;
+          (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
-          (MOVLSS2PSrr (V_SET0), FR32:$src)>, Requires<[HasSSE1]>;
+          (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
-          (MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE1]>;
+          (MOVSSrr (v4f32 (V_SET0)),
+                   (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss)))>;
 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
-          (MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE1]>;
+          (MOVSSrr (v4i32 (V_SET0)),
+                   (EXTRACT_SUBREG (v4i32 VR128:$src), x86_subreg_ss))>;
 }
 
 // Splat v2f64 / v2i64
@@ -3010,8 +3050,7 @@ def : Pat<(unpckh (v2i64 VR128:$src), (undef)),
 // Special unary SHUFPSrri case.
 def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))),
           (SHUFPSrri VR128:$src1, VR128:$src1,
-                     (SHUFFLE_get_shuf_imm VR128:$src3))>,
-      Requires<[HasSSE1]>;
+                     (SHUFFLE_get_shuf_imm VR128:$src3))>;
 let AddedComplexity = 5 in
 def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))),
           (PSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>,
@@ -3057,13 +3096,13 @@ def : Pat<(v4f32 (unpckl_undef:$src2 VR128:$src, (undef))),
 }
 let AddedComplexity = 10 in {
 def : Pat<(v4f32 (unpckl_undef VR128:$src, (undef))),
-          (UNPCKLPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
+          (UNPCKLPSrr VR128:$src, VR128:$src)>;
 def : Pat<(v16i8 (unpckl_undef VR128:$src, (undef))),
-          (PUNPCKLBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+          (PUNPCKLBWrr VR128:$src, VR128:$src)>;
 def : Pat<(v8i16 (unpckl_undef VR128:$src, (undef))),
-          (PUNPCKLWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+          (PUNPCKLWDrr VR128:$src, VR128:$src)>;
 def : Pat<(v4i32 (unpckl_undef VR128:$src, (undef))),
-          (PUNPCKLDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+          (PUNPCKLDQrr VR128:$src, VR128:$src)>;
 }
 
 // vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
@@ -3077,13 +3116,13 @@ def : Pat<(v4f32 (unpckh_undef:$src2 VR128:$src, (undef))),
 }
 let AddedComplexity = 10 in {
 def : Pat<(v4f32 (unpckh_undef VR128:$src, (undef))),
-          (UNPCKHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
+          (UNPCKHPSrr VR128:$src, VR128:$src)>;
 def : Pat<(v16i8 (unpckh_undef VR128:$src, (undef))),
-          (PUNPCKHBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+          (PUNPCKHBWrr VR128:$src, VR128:$src)>;
 def : Pat<(v8i16 (unpckh_undef VR128:$src, (undef))),
-          (PUNPCKHWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+          (PUNPCKHWDrr VR128:$src, VR128:$src)>;
 def : Pat<(v4i32 (unpckh_undef VR128:$src, (undef))),
-          (PUNPCKHDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+          (PUNPCKHDQrr VR128:$src, VR128:$src)>;
 }
 
 let AddedComplexity = 20 in {
@@ -3105,45 +3144,49 @@ def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))),
 let AddedComplexity = 20 in {
 // vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
 def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))),
-          (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
+          (MOVLPSrm VR128:$src1, addr:$src2)>;
 def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))),
-          (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+          (MOVLPDrm VR128:$src1, addr:$src2)>;
 def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))),
-          (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+          (MOVLPSrm VR128:$src1, addr:$src2)>;
 def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))),
-          (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+          (MOVLPDrm VR128:$src1, addr:$src2)>;
 }
 
 // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
 def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
-          (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
+          (MOVLPSmr addr:$src1, VR128:$src2)>;
 def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
-          (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+          (MOVLPDmr addr:$src1, VR128:$src2)>;
 def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
                  addr:$src1),
-          (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
+          (MOVLPSmr addr:$src1, VR128:$src2)>;
 def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
-          (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+          (MOVLPDmr addr:$src1, VR128:$src2)>;
 
 let AddedComplexity = 15 in {
 // Setting the lowest element in the vector.
 def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)),
-          (MOVLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+          (MOVSSrr (v4i32 VR128:$src1),
+                   (EXTRACT_SUBREG (v4i32 VR128:$src2), x86_subreg_ss))>;
 def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)),
-          (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+          (MOVSDrr (v2i64 VR128:$src1),
+                   (EXTRACT_SUBREG (v2i64 VR128:$src2), x86_subreg_sd))>;
 
-// vector_shuffle v1, v2 <4, 5, 2, 3> using MOVLPDrr (movsd)
+// vector_shuffle v1, v2 <4, 5, 2, 3> using movsd
 def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)),
-          (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+          (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, x86_subreg_sd))>,
+      Requires<[HasSSE2]>;
 def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
-          (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+          (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, x86_subreg_sd))>,
+      Requires<[HasSSE2]>;
 }
 
 // vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but
 // fall back to this for SSE1)
 def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
           (SHUFPSrri VR128:$src2, VR128:$src1,
-                     (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE1]>;
+                     (SHUFFLE_get_shuf_imm VR128:$src3))>;
 
 // Set lowest element and zero upper elements.
 let AddedComplexity = 15 in
@@ -3185,30 +3228,30 @@ def : Pat<(v2i32 (fp_to_sint (v2f64 VR128:$src))),
 
 // Use movaps / movups for SSE integer load / store (one byte shorter).
 def : Pat<(alignedloadv4i32 addr:$src),
-          (MOVAPSrm addr:$src)>, Requires<[HasSSE1]>;
+          (MOVAPSrm addr:$src)>;
 def : Pat<(loadv4i32 addr:$src),
-          (MOVUPSrm addr:$src)>, Requires<[HasSSE1]>;
+          (MOVUPSrm addr:$src)>;
 def : Pat<(alignedloadv2i64 addr:$src),
-          (MOVAPSrm addr:$src)>, Requires<[HasSSE2]>;
+          (MOVAPSrm addr:$src)>;
 def : Pat<(loadv2i64 addr:$src),
-          (MOVUPSrm addr:$src)>, Requires<[HasSSE2]>;
+          (MOVUPSrm addr:$src)>;
 
 def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
-          (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+          (MOVAPSmr addr:$dst, VR128:$src)>;
 def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
-          (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+          (MOVAPSmr addr:$dst, VR128:$src)>;
 def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
-          (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+          (MOVAPSmr addr:$dst, VR128:$src)>;
 def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
-          (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+          (MOVAPSmr addr:$dst, VR128:$src)>;
 def : Pat<(store (v2i64 VR128:$src), addr:$dst),
-          (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+          (MOVUPSmr addr:$dst, VR128:$src)>;
 def : Pat<(store (v4i32 VR128:$src), addr:$dst),
-          (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+          (MOVUPSmr addr:$dst, VR128:$src)>;
 def : Pat<(store (v8i16 VR128:$src), addr:$dst),
-          (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+          (MOVUPSmr addr:$dst, VR128:$src)>;
 def : Pat<(store (v16i8 VR128:$src), addr:$dst),
-          (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+          (MOVUPSmr addr:$dst, VR128:$src)>;
 
 //===----------------------------------------------------------------------===//
 // SSE4.1 Instructions
@@ -3397,7 +3440,7 @@ let Constraints = "$src1 = $dst" in {
                    (ins VR128:$src1, i128mem:$src2),
                    !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
                    [(set VR128:$dst,
-                     (OpNode VR128:$src1, (memop addr:$src2)))]>, OpSize;
+                     (OpVT (OpNode VR128:$src1, (memop addr:$src2))))]>, OpSize;
     def rm_int : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
                        (ins VR128:$src1, i128mem:$src2),
                        !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/X86MCAsmInfo.cpp
index d3b0052..250634f 100644
--- a/lib/Target/X86/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/X86MCAsmInfo.cpp
@@ -55,6 +55,11 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) {
   if (!is64Bit)
     Data64bitsDirective = 0;       // we can't emit a 64-bit unit
 
+  // Use ## as a comment string so that .s files generated by llvm can go
+  // through the GCC preprocessor without causing an error.  This is needed
+  // because "clang foo.s" runs the C preprocessor, which is usually reserved
+  // for .S files on other systems.  Perhaps this is because the file system
+  // wasn't always case preserving or something.
   CommentString = "##";
   PCSymbol = ".";
 
@@ -70,6 +75,8 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &Triple) {
   AsmTransCBE = x86_asm_table;
   AssemblerDialect = AsmWriterFlavor;
 
+  TextAlignFillValue = 0x90;
+
   PrivateGlobalPrefix = ".L";
   WeakRefDirective = "\t.weak\t";
   PCSymbol = ".";
@@ -94,27 +101,6 @@ MCSection *X86ELFMCAsmInfo::getNonexecutableStackSection(MCContext &Ctx) const {
 X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) {
   AsmTransCBE = x86_asm_table;
   AssemblerDialect = AsmWriterFlavor;
-}
-
-
-X86WinMCAsmInfo::X86WinMCAsmInfo(const Triple &Triple) {
-  AsmTransCBE = x86_asm_table;
-  AssemblerDialect = AsmWriterFlavor;
 
-  GlobalPrefix = "_";
-  CommentString = ";";
-
-  PrivateGlobalPrefix = "$";
-  AlignDirective = "\tALIGN\t";
-  ZeroDirective = "\tdb\t";
-  AsciiDirective = "\tdb\t";
-  AscizDirective = 0;
-  Data8bitsDirective = "\tdb\t";
-  Data16bitsDirective = "\tdw\t";
-  Data32bitsDirective = "\tdd\t";
-  Data64bitsDirective = "\tdq\t";
-  HasDotTypeDotSizeDirective = false;
-  HasSingleParameterDotFile = false;
-
-  AlignmentIsInBytes = true;
+  TextAlignFillValue = 0x90;
 }
diff --git a/lib/Target/X86/X86MCAsmInfo.h b/lib/Target/X86/X86MCAsmInfo.h
index ca227b7..69716bf 100644
--- a/lib/Target/X86/X86MCAsmInfo.h
+++ b/lib/Target/X86/X86MCAsmInfo.h
@@ -33,11 +33,6 @@ namespace llvm {
   struct X86MCAsmInfoCOFF : public MCAsmInfoCOFF {
     explicit X86MCAsmInfoCOFF(const Triple &Triple);
   };
-
-  struct X86WinMCAsmInfo : public MCAsmInfo {
-    explicit X86WinMCAsmInfo(const Triple &Triple);
-  };
-
 } // namespace llvm
 
 #endif
diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp
index 764c87a..3f18696 100644
--- a/lib/Target/X86/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/X86MCCodeEmitter.cpp
@@ -14,53 +14,44 @@
 #define DEBUG_TYPE "x86-emitter"
 #include "X86.h"
 #include "X86InstrInfo.h"
+#include "X86FixupKinds.h"
 #include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-// FIXME: This should move to a header.
-namespace llvm {
-namespace X86 {
-enum Fixups {
-  reloc_pcrel_word = FirstTargetFixupKind,
-  reloc_picrel_word,
-  reloc_absolute_word,
-  reloc_absolute_word_sext,
-  reloc_absolute_dword
-};
-}
-}
-
 namespace {
 class X86MCCodeEmitter : public MCCodeEmitter {
   X86MCCodeEmitter(const X86MCCodeEmitter &); // DO NOT IMPLEMENT
   void operator=(const X86MCCodeEmitter &); // DO NOT IMPLEMENT
   const TargetMachine &TM;
   const TargetInstrInfo &TII;
+  MCContext &Ctx;
   bool Is64BitMode;
 public:
-  X86MCCodeEmitter(TargetMachine &tm, bool is64Bit) 
-    : TM(tm), TII(*TM.getInstrInfo()) {
+  X86MCCodeEmitter(TargetMachine &tm, MCContext &ctx, bool is64Bit) 
+    : TM(tm), TII(*TM.getInstrInfo()), Ctx(ctx) {
     Is64BitMode = is64Bit;
   }
 
   ~X86MCCodeEmitter() {}
 
   unsigned getNumFixupKinds() const {
-    return 5;
+    return 3;
   }
 
-  MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
-    static MCFixupKindInfo Infos[] = {
-      { "reloc_pcrel_word", 0, 4 * 8 },
-      { "reloc_picrel_word", 0, 4 * 8 },
-      { "reloc_absolute_word", 0, 4 * 8 },
-      { "reloc_absolute_word_sext", 0, 4 * 8 },
-      { "reloc_absolute_dword", 0, 8 * 8 }
+  const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+    const static MCFixupKindInfo Infos[] = {
+      { "reloc_pcrel_4byte", 0, 4 * 8 },
+      { "reloc_pcrel_1byte", 0, 1 * 8 },
+      { "reloc_riprel_4byte", 0, 4 * 8 }
     };
+    
+    if (Kind < FirstTargetFixupKind)
+      return MCCodeEmitter::getFixupKindInfo(Kind);
 
-    assert(Kind >= FirstTargetFixupKind && Kind < MaxTargetFixupKind &&
+    assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
            "Invalid kind!");
     return Infos[Kind - FirstTargetFixupKind];
   }
@@ -83,9 +74,11 @@ public:
     }
   }
 
-  void EmitDisplacementField(const MCOperand &Disp, int64_t Adj, bool IsPCRel,
-                             unsigned &CurByte, raw_ostream &OS,
-                             SmallVectorImpl<MCFixup> &Fixups) const;
+  void EmitImmediate(const MCOperand &Disp, 
+                     unsigned ImmSize, MCFixupKind FixupKind,
+                     unsigned &CurByte, raw_ostream &OS,
+                     SmallVectorImpl<MCFixup> &Fixups,
+                     int ImmOffset = 0) const;
   
   inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode,
                                         unsigned RM) {
@@ -106,8 +99,8 @@ public:
   
   
   void EmitMemModRMByte(const MCInst &MI, unsigned Op,
-                        unsigned RegOpcodeField, intptr_t PCAdj,
-                        unsigned &CurByte, raw_ostream &OS,
+                        unsigned RegOpcodeField, 
+                        unsigned TSFlags, unsigned &CurByte, raw_ostream &OS,
                         SmallVectorImpl<MCFixup> &Fixups) const;
   
   void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
@@ -119,13 +112,15 @@ public:
 
 
 MCCodeEmitter *llvm::createX86_32MCCodeEmitter(const Target &,
-                                               TargetMachine &TM) {
-  return new X86MCCodeEmitter(TM, false);
+                                               TargetMachine &TM,
+                                               MCContext &Ctx) {
+  return new X86MCCodeEmitter(TM, Ctx, false);
 }
 
 MCCodeEmitter *llvm::createX86_64MCCodeEmitter(const Target &,
-                                               TargetMachine &TM) {
-  return new X86MCCodeEmitter(TM, true);
+                                               TargetMachine &TM,
+                                               MCContext &Ctx) {
+  return new X86MCCodeEmitter(TM, Ctx, true);
 }
 
 
@@ -135,36 +130,59 @@ static bool isDisp8(int Value) {
   return Value == (signed char)Value;
 }
 
+/// getImmFixupKind - Return the appropriate fixup kind to use for an immediate
+/// in an instruction with the specified TSFlags.
+static MCFixupKind getImmFixupKind(unsigned TSFlags) {
+  unsigned Size = X86II::getSizeOfImm(TSFlags);
+  bool isPCRel = X86II::isImmPCRel(TSFlags);
+  
+  switch (Size) {
+  default: assert(0 && "Unknown immediate size");
+  case 1: return isPCRel ? MCFixupKind(X86::reloc_pcrel_1byte) : FK_Data_1;
+  case 4: return isPCRel ? MCFixupKind(X86::reloc_pcrel_4byte) : FK_Data_4;
+  case 2: assert(!isPCRel); return FK_Data_2;
+  case 8: assert(!isPCRel); return FK_Data_8;
+  }
+}
+
+
 void X86MCCodeEmitter::
-EmitDisplacementField(const MCOperand &DispOp, int64_t Adj, bool IsPCRel,
-                      unsigned &CurByte, raw_ostream &OS,
-                      SmallVectorImpl<MCFixup> &Fixups) const {
+EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind,
+              unsigned &CurByte, raw_ostream &OS,
+              SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const {
   // If this is a simple integer displacement that doesn't require a relocation,
   // emit it now.
   if (DispOp.isImm()) {
-    EmitConstant(DispOp.getImm(), 4, CurByte, OS);
+    // FIXME: is this right for pc-rel encoding??  Probably need to emit this as
+    // a fixup if so.
+    EmitConstant(DispOp.getImm()+ImmOffset, Size, CurByte, OS);
     return;
   }
 
-#if 0
-  // Otherwise, this is something that requires a relocation.  Emit it as such
-  // now.
-  unsigned RelocType = Is64BitMode ?
-  (IsPCRel ? X86::reloc_pcrel_word : X86::reloc_absolute_word_sext)
-  : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
-#endif
+  // If we have an immoffset, add it to the expression.
+  const MCExpr *Expr = DispOp.getExpr();
+  
+  // If the fixup is pc-relative, we need to bias the value to be relative to
+  // the start of the field, not the end of the field.
+  if (FixupKind == MCFixupKind(X86::reloc_pcrel_4byte) ||
+      FixupKind == MCFixupKind(X86::reloc_riprel_4byte))
+    ImmOffset -= 4;
+  if (FixupKind == MCFixupKind(X86::reloc_pcrel_1byte))
+    ImmOffset -= 1;
+  
+  if (ImmOffset)
+    Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(ImmOffset, Ctx),
+                                   Ctx);
   
   // Emit a symbolic constant as a fixup and 4 zeros.
-  Fixups.push_back(MCFixup::Create(CurByte, DispOp.getExpr(),
-                                   MCFixupKind(X86::reloc_absolute_word)));
-  EmitConstant(0, 4, CurByte, OS);
+  Fixups.push_back(MCFixup::Create(CurByte, Expr, FixupKind));
+  EmitConstant(0, Size, CurByte, OS);
 }
 
 
 void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
                                         unsigned RegOpcodeField,
-                                        intptr_t PCAdj,
-                                        unsigned &CurByte,
+                                        unsigned TSFlags, unsigned &CurByte,
                                         raw_ostream &OS,
                                         SmallVectorImpl<MCFixup> &Fixups) const{
   const MCOperand &Disp     = MI.getOperand(Op+3);
@@ -172,31 +190,48 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
   const MCOperand &Scale    = MI.getOperand(Op+1);
   const MCOperand &IndexReg = MI.getOperand(Op+2);
   unsigned BaseReg = Base.getReg();
-
-  // FIXME: Eliminate!
-  bool IsPCRel = false;
+  
+  // Handle %rip relative addressing.
+  if (BaseReg == X86::RIP) {    // [disp32+RIP] in X86-64 mode
+    assert(IndexReg.getReg() == 0 && Is64BitMode &&
+           "Invalid rip-relative address");
+    EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS);
+    
+    // rip-relative addressing is actually relative to the *next* instruction.
+    // Since an immediate can follow the mod/rm byte for an instruction, this
+    // means that we need to bias the immediate field of the instruction with
+    // the size of the immediate field.  If we have this case, add it into the
+    // expression to emit.
+    int ImmSize = X86II::hasImm(TSFlags) ? X86II::getSizeOfImm(TSFlags) : 0;
     
+    EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_riprel_4byte),
+                  CurByte, OS, Fixups, -ImmSize);
+    return;
+  }
+  
+  unsigned BaseRegNo = BaseReg ? GetX86RegNum(Base) : -1U;
+  
   // Determine whether a SIB byte is needed.
   // If no BaseReg, issue a RIP relative instruction only if the MCE can 
   // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table
   // 2-7) and absolute references.
+
   if (// The SIB byte must be used if there is an index register.
       IndexReg.getReg() == 0 && 
-      // The SIB byte must be used if the base is ESP/RSP.
-      BaseReg != X86::ESP && BaseReg != X86::RSP &&
+      // The SIB byte must be used if the base is ESP/RSP/R12, all of which
+      // encode to an R/M value of 4, which indicates that a SIB byte is
+      // present.
+      BaseRegNo != N86::ESP &&
       // If there is no base register and we're in 64-bit mode, we need a SIB
       // byte to emit an addr that is just 'disp32' (the non-RIP relative form).
       (!Is64BitMode || BaseReg != 0)) {
 
-    if (BaseReg == 0 ||          // [disp32]     in X86-32 mode
-        BaseReg == X86::RIP) {   // [disp32+RIP] in X86-64 mode
+    if (BaseReg == 0) {          // [disp32]     in X86-32 mode
       EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS);
-      EmitDisplacementField(Disp, PCAdj, true, CurByte, OS, Fixups);
+      EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups);
       return;
     }
     
-    unsigned BaseRegNo = GetX86RegNum(Base);
-
     // If the base is not EBP/ESP and there is no displacement, use simple
     // indirect register encoding, this handles addresses like [EAX].  The
     // encoding for [EBP] with no displacement means [disp32] so we handle it
@@ -209,13 +244,13 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
     // Otherwise, if the displacement fits in a byte, encode as [REG+disp8].
     if (Disp.isImm() && isDisp8(Disp.getImm())) {
       EmitByte(ModRMByte(1, RegOpcodeField, BaseRegNo), CurByte, OS);
-      EmitConstant(Disp.getImm(), 1, CurByte, OS);
+      EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups);
       return;
     }
     
     // Otherwise, emit the most general non-SIB encoding: [REG+disp32]
     EmitByte(ModRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS);
-    EmitDisplacementField(Disp, PCAdj, IsPCRel, CurByte, OS, Fixups);
+    EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups);
     return;
   }
     
@@ -270,9 +305,9 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
   
   // Do we need to output a displacement?
   if (ForceDisp8)
-    EmitConstant(Disp.getImm(), 1, CurByte, OS);
+    EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups);
   else if (ForceDisp32 || Disp.getImm() != 0)
-    EmitDisplacementField(Disp, PCAdj, IsPCRel, CurByte, OS, Fixups);
+    EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups);
 }
 
 /// DetermineREXPrefix - Determine if the MCInst has to be encoded with a X86-64
@@ -280,11 +315,11 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
 /// size, and 3) use of X86-64 extended registers.
 static unsigned DetermineREXPrefix(const MCInst &MI, unsigned TSFlags,
                                    const TargetInstrDesc &Desc) {
-  unsigned REX = 0;
-  
-  // Pseudo instructions do not need REX prefix byte.
+  // Pseudo instructions never have a rex byte.
   if ((TSFlags & X86II::FormMask) == X86II::Pseudo)
     return 0;
+  
+  unsigned REX = 0;
   if (TSFlags & X86II::REX_W)
     REX |= 1 << 3;
   
@@ -482,52 +517,29 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
   case X86II::MRMInitReg:
     assert(0 && "FIXME: Remove this form when the JIT moves to MCCodeEmitter!");
   default: errs() << "FORM: " << (TSFlags & X86II::FormMask) << "\n";
-      assert(0 && "Unknown FormMask value in X86MCCodeEmitter!");
-  case X86II::RawFrm: {
+    assert(0 && "Unknown FormMask value in X86MCCodeEmitter!");
+  case X86II::Pseudo: return; // Pseudo instructions encode to nothing.
+  case X86II::RawFrm:
     EmitByte(BaseOpcode, CurByte, OS);
-    
-    if (CurOp == NumOps)
-      break;
-    
-    assert(0 && "Unimpl RawFrm expr");
     break;
-  }
       
-  case X86II::AddRegFrm: {
+  case X86II::AddRegFrm:
     EmitByte(BaseOpcode + GetX86RegNum(MI.getOperand(CurOp++)), CurByte, OS);
-    if (CurOp == NumOps)
-      break;
-
-    const MCOperand &MO1 = MI.getOperand(CurOp++);
-    if (MO1.isImm()) {
-      unsigned Size = X86II::getSizeOfImm(TSFlags);
-      EmitConstant(MO1.getImm(), Size, CurByte, OS);
-      break;
-    }
-
-    assert(0 && "Unimpl AddRegFrm expr");
     break;
-  }
       
   case X86II::MRMDestReg:
     EmitByte(BaseOpcode, CurByte, OS);
     EmitRegModRMByte(MI.getOperand(CurOp),
                      GetX86RegNum(MI.getOperand(CurOp+1)), CurByte, OS);
     CurOp += 2;
-    if (CurOp != NumOps)
-      EmitConstant(MI.getOperand(CurOp++).getImm(),
-                   X86II::getSizeOfImm(TSFlags), CurByte, OS);
     break;
   
   case X86II::MRMDestMem:
     EmitByte(BaseOpcode, CurByte, OS);
     EmitMemModRMByte(MI, CurOp,
                      GetX86RegNum(MI.getOperand(CurOp + X86AddrNumOperands)),
-                     0, CurByte, OS, Fixups);
+                     TSFlags, CurByte, OS, Fixups);
     CurOp += X86AddrNumOperands + 1;
-    if (CurOp != NumOps)
-      EmitConstant(MI.getOperand(CurOp++).getImm(),
-                   X86II::getSizeOfImm(TSFlags), CurByte, OS);
     break;
       
   case X86II::MRMSrcReg:
@@ -535,9 +547,6 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
     EmitRegModRMByte(MI.getOperand(CurOp+1), GetX86RegNum(MI.getOperand(CurOp)),
                      CurByte, OS);
     CurOp += 2;
-    if (CurOp != NumOps)
-      EmitConstant(MI.getOperand(CurOp++).getImm(),
-                   X86II::getSizeOfImm(TSFlags), CurByte, OS);
     break;
     
   case X86II::MRMSrcMem: {
@@ -551,117 +560,78 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
     else
       AddrOperands = X86AddrNumOperands;
     
-    // FIXME: What is this actually doing?
-    intptr_t PCAdj = (CurOp + AddrOperands + 1 != NumOps) ?
-       X86II::getSizeOfImm(TSFlags) : 0;
-    
     EmitMemModRMByte(MI, CurOp+1, GetX86RegNum(MI.getOperand(CurOp)),
-                     PCAdj, CurByte, OS, Fixups);
+                     TSFlags, CurByte, OS, Fixups);
     CurOp += AddrOperands + 1;
-    if (CurOp != NumOps)
-      EmitConstant(MI.getOperand(CurOp++).getImm(),
-                   X86II::getSizeOfImm(TSFlags), CurByte, OS);
     break;
   }
 
   case X86II::MRM0r: case X86II::MRM1r:
   case X86II::MRM2r: case X86II::MRM3r:
   case X86II::MRM4r: case X86II::MRM5r:
-  case X86II::MRM6r: case X86II::MRM7r: {
+  case X86II::MRM6r: case X86II::MRM7r:
     EmitByte(BaseOpcode, CurByte, OS);
-
-    // Special handling of lfence, mfence, monitor, and mwait.
-    // FIXME: This is terrible, they should get proper encoding bits in TSFlags.
-    if (Opcode == X86::LFENCE || Opcode == X86::MFENCE ||
-        Opcode == X86::MONITOR || Opcode == X86::MWAIT) {
-      EmitByte(ModRMByte(3, (TSFlags & X86II::FormMask)-X86II::MRM0r, 0),
-               CurByte, OS);
-
-      switch (Opcode) {
-      default: break;
-      case X86::MONITOR: EmitByte(0xC8, CurByte, OS); break;
-      case X86::MWAIT:   EmitByte(0xC9, CurByte, OS); break;
-      }
-    } else {
-      EmitRegModRMByte(MI.getOperand(CurOp++),
-                       (TSFlags & X86II::FormMask)-X86II::MRM0r,
-                       CurByte, OS);
-    }
-
-    if (CurOp == NumOps)
-      break;
-    
-    const MCOperand &MO1 = MI.getOperand(CurOp++);
-    if (MO1.isImm()) {
-      EmitConstant(MO1.getImm(), X86II::getSizeOfImm(TSFlags), CurByte, OS);
-      break;
-    }
-
-    assert(0 && "relo unimpl");
-#if 0
-    unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
-      : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
-    if (Opcode == X86::MOV64ri32)
-      rt = X86::reloc_absolute_word_sext;  // FIXME: add X86II flag?
-    if (MO1.isGlobal()) {
-      bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
-      emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
-                        Indirect);
-    } else if (MO1.isSymbol())
-      emitExternalSymbolAddress(MO1.getSymbolName(), rt);
-    else if (MO1.isCPI())
-      emitConstPoolAddress(MO1.getIndex(), rt);
-    else if (MO1.isJTI())
-      emitJumpTableAddress(MO1.getIndex(), rt);
+    EmitRegModRMByte(MI.getOperand(CurOp++),
+                     (TSFlags & X86II::FormMask)-X86II::MRM0r,
+                     CurByte, OS);
     break;
-#endif
-  }
   case X86II::MRM0m: case X86II::MRM1m:
   case X86II::MRM2m: case X86II::MRM3m:
   case X86II::MRM4m: case X86II::MRM5m:
-  case X86II::MRM6m: case X86II::MRM7m: {
-    intptr_t PCAdj = 0;
-    if (CurOp + X86AddrNumOperands != NumOps) {
-      if (MI.getOperand(CurOp+X86AddrNumOperands).isImm())
-        PCAdj = X86II::getSizeOfImm(TSFlags);
-      else
-        PCAdj = 4;
-    }
-
+  case X86II::MRM6m: case X86II::MRM7m:
     EmitByte(BaseOpcode, CurByte, OS);
     EmitMemModRMByte(MI, CurOp, (TSFlags & X86II::FormMask)-X86II::MRM0m,
-                     PCAdj, CurByte, OS, Fixups);
+                     TSFlags, CurByte, OS, Fixups);
     CurOp += X86AddrNumOperands;
-    
-    if (CurOp == NumOps)
-      break;
-    
-    const MCOperand &MO = MI.getOperand(CurOp++);
-    if (MO.isImm()) {
-      EmitConstant(MO.getImm(), X86II::getSizeOfImm(TSFlags), CurByte, OS);
-      break;
-    }
-    
-    assert(0 && "relo not handled");
-#if 0
-    unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
-    : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
-    if (Opcode == X86::MOV64mi32)
-      rt = X86::reloc_absolute_word_sext;  // FIXME: add X86II flag?
-    if (MO.isGlobal()) {
-      bool Indirect = gvNeedsNonLazyPtr(MO, TM);
-      emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0,
-                        Indirect);
-    } else if (MO.isSymbol())
-      emitExternalSymbolAddress(MO.getSymbolName(), rt);
-    else if (MO.isCPI())
-      emitConstPoolAddress(MO.getIndex(), rt);
-    else if (MO.isJTI())
-      emitJumpTableAddress(MO.getIndex(), rt);
-#endif
+    break;
+  case X86II::MRM_C1:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xC1, CurByte, OS);
+    break;
+  case X86II::MRM_C2:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xC2, CurByte, OS);
+    break;
+  case X86II::MRM_C3:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xC3, CurByte, OS);
+    break;
+  case X86II::MRM_C4:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xC4, CurByte, OS);
+    break;
+  case X86II::MRM_C8:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xC8, CurByte, OS);
+    break;
+  case X86II::MRM_C9:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xC9, CurByte, OS);
+    break;
+  case X86II::MRM_E8:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xE8, CurByte, OS);
+    break;
+  case X86II::MRM_F0:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xF0, CurByte, OS);
+    break;
+  case X86II::MRM_F8:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xF8, CurByte, OS);
+    break;
+  case X86II::MRM_F9:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xF9, CurByte, OS);
     break;
   }
-  }
+  
+  // If there is a remaining operand, it must be a trailing immediate.  Emit it
+  // according to the right size for the instruction.
+  if (CurOp != NumOps)
+    EmitImmediate(MI.getOperand(CurOp++),
+                  X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
+                  CurByte, OS, Fixups);
   
 #ifndef NDEBUG
   // FIXME: Verify.
diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h
index bb53bf1..4b2529b 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/lib/Target/X86/X86MachineFunctionInfo.h
@@ -18,12 +18,6 @@
 
 namespace llvm {
 
-enum NameDecorationStyle {
-  None,
-  StdCall,
-  FastCall
-};
-  
 /// X86MachineFunctionInfo - This class is derived from MachineFunction and
 /// contains private X86 target-specific information for each MachineFunction.
 class X86MachineFunctionInfo : public MachineFunctionInfo {
@@ -41,10 +35,6 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
   /// Used on windows platform for stdcall & fastcall name decoration
   unsigned BytesToPopOnReturn;
 
-  /// DecorationStyle - If the function requires additional name decoration,
-  /// DecorationStyle holds the right way to do so.
-  NameDecorationStyle DecorationStyle;
-
   /// ReturnAddrIndex - FrameIndex for return slot.
   int ReturnAddrIndex;
 
@@ -66,7 +56,6 @@ public:
   X86MachineFunctionInfo() : ForceFramePointer(false),
                              CalleeSavedFrameSize(0),
                              BytesToPopOnReturn(0),
-                             DecorationStyle(None),
                              ReturnAddrIndex(0),
                              TailCallReturnAddrDelta(0),
                              SRetReturnReg(0),
@@ -76,7 +65,6 @@ public:
     : ForceFramePointer(false),
       CalleeSavedFrameSize(0),
       BytesToPopOnReturn(0),
-      DecorationStyle(None),
       ReturnAddrIndex(0),
       TailCallReturnAddrDelta(0),
       SRetReturnReg(0),
@@ -91,9 +79,6 @@ public:
   unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; }
   void setBytesToPopOnReturn (unsigned bytes) { BytesToPopOnReturn = bytes;}
 
-  NameDecorationStyle getDecorationStyle() const { return DecorationStyle; }
-  void setDecorationStyle(NameDecorationStyle style) { DecorationStyle = style;}
-
   int getRAIndex() const { return ReturnAddrIndex; }
   void setRAIndex(int Index) { ReturnAddrIndex = Index; }
 
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 081c6d9..0f4ce37 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -191,6 +191,8 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
         return &X86::GR16_NOREXRegClass;
       else if (A == &X86::GR16_ABCDRegClass)
         return &X86::GR16_ABCDRegClass;
+    } else if (B == &X86::FR32RegClass) {
+      return A;
     }
     break;
   case 2:
@@ -207,6 +209,8 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
       else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass ||
                A == &X86::GR16_NOREXRegClass)
         return &X86::GR16_ABCDRegClass;
+    } else if (B == &X86::FR64RegClass) {
+      return A;
     }
     break;
   case 3:
@@ -234,6 +238,8 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
         return &X86::GR32_NOREXRegClass;
       else if (A == &X86::GR32_ABCDRegClass)
         return &X86::GR64_ABCDRegClass;
+    } else if (B == &X86::VR128RegClass) {
+      return A;
     }
     break;
   case 4:
@@ -446,8 +452,10 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
 
 bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const Function *F = MF.getFunction();
   bool requiresRealignment =
-    RealignStack && (MFI->getMaxAlignment() > StackAlign);
+    RealignStack && ((MFI->getMaxAlignment() > StackAlign) ||
+                     F->hasFnAttr(Attribute::StackAlignment));
 
   // FIXME: Currently we don't support stack realignment for functions with
   //        variable-sized allocas.
@@ -485,7 +493,7 @@ X86RegisterInfo::getFrameIndexOffset(const MachineFunction &MF, int FI) const {
       Offset += SlotSize;
     } else {
       unsigned Align = MFI->getObjectAlignment(FI);
-      assert( (-(Offset + StackSize)) % Align == 0);
+      assert((-(Offset + StackSize)) % Align == 0);
       Align = 0;
       return Offset + StackSize;
     }
@@ -627,10 +635,6 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                                       RegScavenger *RS) const {
   MachineFrameInfo *MFI = MF.getFrameInfo();
 
-  // Calculate and set max stack object alignment early, so we can decide
-  // whether we will need stack realignment (and thus FP).
-  MFI->calculateMaxStackAlignment();
-
   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
   int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
 
@@ -1053,7 +1057,8 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
       BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
         .addImm(NumBytes);
       BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
-        .addExternalSymbol("_alloca");
+        .addExternalSymbol("_alloca")
+        .addReg(StackPtr, RegState::Define | RegState::Implicit);
     } else {
       // Save EAX
       BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
@@ -1064,7 +1069,8 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
       BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
         .addImm(NumBytes - 4);
       BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
-        .addExternalSymbol("_alloca");
+        .addExternalSymbol("_alloca")
+        .addReg(StackPtr, RegState::Define | RegState::Implicit);
 
       // Restore EAX
       MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index 8fb5e92..e4bdb4e 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -35,7 +35,8 @@ namespace X86 {
   /// these indices must be kept in sync with the class indices in the 
   /// X86RegisterInfo.td file.
   enum SubregIndex {
-    SUBREG_8BIT = 1, SUBREG_8BIT_HI = 2, SUBREG_16BIT = 3, SUBREG_32BIT = 4
+    SUBREG_8BIT = 1, SUBREG_8BIT_HI = 2, SUBREG_16BIT = 3, SUBREG_32BIT = 4,
+    SUBREG_SS = 1, SUBREG_SD = 2, SUBREG_XMM = 3
   };
 }
 
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 1559bf7..ed2ce6c 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -158,22 +158,22 @@ let Namespace = "X86" in {
   def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>;
 
   // YMM Registers, used by AVX instructions
-  def YMM0: Register<"ymm0">, DwarfRegNum<[17, 21, 21]>;
-  def YMM1: Register<"ymm1">, DwarfRegNum<[18, 22, 22]>;
-  def YMM2: Register<"ymm2">, DwarfRegNum<[19, 23, 23]>;
-  def YMM3: Register<"ymm3">, DwarfRegNum<[20, 24, 24]>;
-  def YMM4: Register<"ymm4">, DwarfRegNum<[21, 25, 25]>;
-  def YMM5: Register<"ymm5">, DwarfRegNum<[22, 26, 26]>;
-  def YMM6: Register<"ymm6">, DwarfRegNum<[23, 27, 27]>;
-  def YMM7: Register<"ymm7">, DwarfRegNum<[24, 28, 28]>;
-  def YMM8:  Register<"ymm8">,  DwarfRegNum<[25, -2, -2]>;
-  def YMM9:  Register<"ymm9">,  DwarfRegNum<[26, -2, -2]>;
-  def YMM10: Register<"ymm10">, DwarfRegNum<[27, -2, -2]>;
-  def YMM11: Register<"ymm11">, DwarfRegNum<[28, -2, -2]>;
-  def YMM12: Register<"ymm12">, DwarfRegNum<[29, -2, -2]>;
-  def YMM13: Register<"ymm13">, DwarfRegNum<[30, -2, -2]>;
-  def YMM14: Register<"ymm14">, DwarfRegNum<[31, -2, -2]>;
-  def YMM15: Register<"ymm15">, DwarfRegNum<[32, -2, -2]>;
+  def YMM0: RegisterWithSubRegs<"ymm0", [XMM0]>, DwarfRegNum<[17, 21, 21]>;
+  def YMM1: RegisterWithSubRegs<"ymm1", [XMM1]>, DwarfRegNum<[18, 22, 22]>;
+  def YMM2: RegisterWithSubRegs<"ymm2", [XMM2]>, DwarfRegNum<[19, 23, 23]>;
+  def YMM3: RegisterWithSubRegs<"ymm3", [XMM3]>, DwarfRegNum<[20, 24, 24]>;
+  def YMM4: RegisterWithSubRegs<"ymm4", [XMM4]>, DwarfRegNum<[21, 25, 25]>;
+  def YMM5: RegisterWithSubRegs<"ymm5", [XMM5]>, DwarfRegNum<[22, 26, 26]>;
+  def YMM6: RegisterWithSubRegs<"ymm6", [XMM6]>, DwarfRegNum<[23, 27, 27]>;
+  def YMM7: RegisterWithSubRegs<"ymm7", [XMM7]>, DwarfRegNum<[24, 28, 28]>;
+  def YMM8:  RegisterWithSubRegs<"ymm8", [XMM8]>,  DwarfRegNum<[25, -2, -2]>;
+  def YMM9:  RegisterWithSubRegs<"ymm9", [XMM9]>,  DwarfRegNum<[26, -2, -2]>;
+  def YMM10: RegisterWithSubRegs<"ymm10", [XMM10]>, DwarfRegNum<[27, -2, -2]>;
+  def YMM11: RegisterWithSubRegs<"ymm11", [XMM11]>, DwarfRegNum<[28, -2, -2]>;
+  def YMM12: RegisterWithSubRegs<"ymm12", [XMM12]>, DwarfRegNum<[29, -2, -2]>;
+  def YMM13: RegisterWithSubRegs<"ymm13", [XMM13]>, DwarfRegNum<[30, -2, -2]>;
+  def YMM14: RegisterWithSubRegs<"ymm14", [XMM14]>, DwarfRegNum<[31, -2, -2]>;
+  def YMM15: RegisterWithSubRegs<"ymm15", [XMM15]>, DwarfRegNum<[32, -2, -2]>;
 
   // Floating point stack registers
   def ST0 : Register<"st(0)">, DwarfRegNum<[33, 12, 11]>;
@@ -238,6 +238,10 @@ def x86_subreg_8bit_hi : PatLeaf<(i32 2)>;
 def x86_subreg_16bit   : PatLeaf<(i32 3)>;
 def x86_subreg_32bit   : PatLeaf<(i32 4)>;
 
+def x86_subreg_ss   : PatLeaf<(i32 1)>;
+def x86_subreg_sd   : PatLeaf<(i32 2)>;
+def x86_subreg_xmm  : PatLeaf<(i32 3)>;
+
 def : SubRegSet<1, [AX, CX, DX, BX, SP,  BP,  SI,  DI,  
                     R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W],
                    [AL, CL, DL, BL, SPL, BPL, SIL, DIL, 
@@ -277,11 +281,31 @@ def : SubRegSet<4, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
                    [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, 
                     R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]>;
 
-def : SubRegSet<1, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,  
+def : SubRegSet<1, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
+                    YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15],
+                   [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+                    XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
+
+def : SubRegSet<2, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
+                    YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15],
+                   [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+                    XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
+
+def : SubRegSet<3, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,  
                     YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15],
                    [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, 
                     XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
 
+def : SubRegSet<1, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+                    XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15],
+                   [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+                    XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
+
+def : SubRegSet<2, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+                    XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15],
+                   [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+                    XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
+
 //===----------------------------------------------------------------------===//
 // Register Class Definitions... now that we have all of the pieces, define the
 // top-level register classes.  The order specified in the register list is
@@ -793,6 +817,7 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
                           [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
                            XMM8, XMM9, XMM10, XMM11,
                            XMM12, XMM13, XMM14, XMM15]> {
+  let SubRegClassList = [FR32, FR64];
   let MethodProtos = [{
     iterator allocation_order_end(const MachineFunction &MF) const;
   }];
@@ -811,7 +836,9 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
 def VR256 : RegisterClass<"X86", [ v8i32, v4i64, v8f32, v4f64],256,
                           [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
                            YMM8, YMM9, YMM10, YMM11,
-                           YMM12, YMM13, YMM14, YMM15]>;
+                           YMM12, YMM13, YMM14, YMM15]> {
+  let SubRegClassList = [FR32, FR64, VR128];
+}
 
 // Status flags registers.
 def CCR : RegisterClass<"X86", [i32], 32, [EFLAGS]> {
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 618dd10..594a470 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -20,9 +20,9 @@
 namespace llvm {
 class GlobalValue;
 class TargetMachine;
-  
+
 /// PICStyles - The X86 backend supports a number of different styles of PIC.
-/// 
+///
 namespace PICStyles {
 enum Style {
   StubPIC,          // Used on i386-darwin in -fPIC mode.
@@ -46,7 +46,7 @@ protected:
   /// PICStyle - Which PIC style to use
   ///
   PICStyles::Style PICStyle;
-  
+
   /// X86SSELevel - MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or
   /// none supported.
   X86SSEEnum X86SSELevel;
@@ -58,7 +58,7 @@ protected:
   /// HasCMov - True if this processor has conditional move instructions
   /// (generally pentium pro+).
   bool HasCMov;
-  
+
   /// HasX86_64 - True if the processor supports X86-64 instructions.
   ///
   bool HasX86_64;
@@ -78,8 +78,9 @@ protected:
   /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
   bool IsBTMemSlow;
 
-  /// HasVectorUAMem - True if SIMD operations can have unaligned memory operands.
-  ///                  This may require setting a feature bit in the processor.
+  /// HasVectorUAMem - True if SIMD operations can have unaligned memory
+  ///                  operands. This may require setting a feature bit in the
+  ///                  processor.
   bool HasVectorUAMem;
 
   /// DarwinVers - Nonzero if this is a darwin platform: the numeric
@@ -150,20 +151,20 @@ public:
 
   bool isTargetDarwin() const { return TargetType == isDarwin; }
   bool isTargetELF() const { return TargetType == isELF; }
-  
+
   bool isTargetWindows() const { return TargetType == isWindows; }
   bool isTargetMingw() const { return TargetType == isMingw; }
   bool isTargetCygwin() const { return TargetType == isCygwin; }
   bool isTargetCygMing() const {
     return TargetType == isMingw || TargetType == isCygwin;
   }
-  
+
   /// isTargetCOFF - Return true if this is any COFF/Windows target variant.
   bool isTargetCOFF() const {
     return TargetType == isMingw || TargetType == isCygwin ||
            TargetType == isWindows;
   }
-  
+
   bool isTargetWin64() const {
     return Is64Bit && (TargetType == isMingw || TargetType == isWindows);
   }
@@ -175,7 +176,7 @@ public:
     else if (isTargetDarwin())
       p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32";
     else if (isTargetMingw() || isTargetWindows())
-      p = "e-p:32:32-f64:64:64-i64:64:64-f80:128:128-n8:16:32";
+      p = "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-n8:16:32";
     else
       p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32";
 
@@ -196,11 +197,11 @@ public:
   bool isPICStyleStubAny() const {
     return PICStyle == PICStyles::StubDynamicNoPIC ||
            PICStyle == PICStyles::StubPIC; }
-  
+
   /// getDarwinVers - Return the darwin version number, 8 = Tiger, 9 = Leopard,
   /// 10 = Snow Leopard, etc.
   unsigned getDarwinVers() const { return DarwinVers; }
-    
+
   /// ClassifyGlobalReference - Classify a global variable reference for the
   /// current subtarget according to how we should reference it in a non-pcrel
   /// context.
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index f835e29..56ddaf8 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -30,9 +30,8 @@ static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
   case Triple::MinGW32:
   case Triple::MinGW64:
   case Triple::Cygwin:
-    return new X86MCAsmInfoCOFF(TheTriple);
   case Triple::Win32:
-    return new X86WinMCAsmInfo(TheTriple);
+    return new X86MCAsmInfoCOFF(TheTriple);
   default:
     return new X86ELFMCAsmInfo(TheTriple);
   }
@@ -48,11 +47,16 @@ extern "C" void LLVMInitializeX86Target() {
   RegisterAsmInfoFn B(TheX86_64Target, createMCAsmInfo);
 
   // Register the code emitter.
-  // FIXME: Remove the heinous one when the new one works.
   TargetRegistry::RegisterCodeEmitter(TheX86_32Target,
-                                      createHeinousX86MCCodeEmitter);
+                                      createX86_32MCCodeEmitter);
   TargetRegistry::RegisterCodeEmitter(TheX86_64Target,
-                                      createHeinousX86MCCodeEmitter);
+                                      createX86_64MCCodeEmitter);
+
+  // Register the asm backend.
+  TargetRegistry::RegisterAsmBackend(TheX86_32Target,
+                                     createX86_32AsmBackend);
+  TargetRegistry::RegisterAsmBackend(TheX86_64Target,
+                                     createX86_64AsmBackend);
 }
 
 
@@ -201,32 +205,3 @@ void X86TargetMachine::setCodeModelForJIT() {
   else
     setCodeModel(CodeModel::Small);
 }
-
-/// getLSDAEncoding - Returns the LSDA pointer encoding. The choices are 4-byte,
-/// 8-byte, and target default. The CIE is hard-coded to indicate that the LSDA
-/// pointer in the FDE section is an "sdata4", and should be encoded as a 4-byte
-/// pointer by default. However, some systems may require a different size due
-/// to bugs or other conditions. We will default to a 4-byte encoding unless the
-/// system tells us otherwise.
-///
-/// The issue is when the CIE says their is an LSDA. That mandates that every
-/// FDE have an LSDA slot. But if the function does not need an LSDA. There
-/// needs to be some way to signify there is none. The LSDA is encoded as
-/// pc-rel. But you don't look for some magic value after adding the pc. You
-/// have to look for a zero before adding the pc. The problem is that the size
-/// of the zero to look for depends on the encoding. The unwinder bug in SL is
-/// that it always checks for a pointer-size zero. So on x86_64 it looks for 8
-/// bytes of zero. If you have an LSDA, it works fine since the 8-bytes are
-/// non-zero so it goes ahead and then reads the value based on the encoding.
-/// But if you use sdata4 and there is no LSDA, then the test for zero gives a
-/// false negative and the unwinder thinks there is an LSDA.
-///
-/// FIXME: This call-back isn't good! We should be using the correct encoding
-/// regardless of the system. However, there are some systems which have bugs
-/// that prevent this from occuring.
-DwarfLSDAEncoding::Encoding X86TargetMachine::getLSDAEncoding() const {
-  if (Subtarget.isTargetDarwin() && Subtarget.getDarwinVers() != 10)
-    return DwarfLSDAEncoding::Default;
-
-  return DwarfLSDAEncoding::EightByte;
-}
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index eee29be..2bb5454 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -62,18 +62,6 @@ public:
     return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
   }
 
-  /// getLSDAEncoding - Returns the LSDA pointer encoding. The choices are
-  /// 4-byte, 8-byte, and target default. The CIE is hard-coded to indicate that
-  /// the LSDA pointer in the FDE section is an "sdata4", and should be encoded
-  /// as a 4-byte pointer by default. However, some systems may require a
-  /// different size due to bugs or other conditions. We will default to a
-  /// 4-byte encoding unless the system tells us otherwise.
-  ///
-  /// FIXME: This call-back isn't good! We should be using the correct encoding
-  /// regardless of the system. However, there are some systems which have bugs
-  /// that prevent this from occuring.
-  virtual DwarfLSDAEncoding::Encoding getLSDAEncoding() const;
-
   // Set up the pass pipeline.
   virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index b8cef7d..29a0be5 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -7,61 +7,112 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "X86TargetObjectFile.h"
 #include "X86MCTargetExpr.h"
+#include "X86TargetObjectFile.h"
+#include "X86TargetMachine.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/Target/Mangler.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Dwarf.h"
 using namespace llvm;
+using namespace dwarf;
 
-const MCExpr *X8632_MachoTargetObjectFile::
+const MCExpr *X8664_MachoTargetObjectFile::
 getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
-                                 MachineModuleInfo *MMI,
-                                 bool &IsIndirect, bool &IsPCRel) const {
-  // The mach-o version of this method defaults to returning a stub reference.
-  IsIndirect = true;
-  IsPCRel    = false;
-  
-  
-  MachineModuleInfoMachO &MachOMMI =
-  MMI->getObjFileInfo<MachineModuleInfoMachO>();
-  
-  // FIXME: Use GetSymbolWithGlobalValueBase.
-  SmallString<128> Name;
-  Mang->getNameWithPrefix(Name, GV, true);
-  Name += "$non_lazy_ptr";
-  
-  // Add information about the stub reference to MachOMMI so that the stub gets
-  // emitted by the asmprinter.
-  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str());
-  MCSymbol *&StubSym = MachOMMI.getGVStubEntry(Sym);
-  if (StubSym == 0) {
-    Name.clear();
+                           MachineModuleInfo *MMI, unsigned Encoding) const {
+
+  // On Darwin/X86-64, we can reference dwarf symbols with foo@GOTPCREL+4, which
+  // is an indirect pc-relative reference.
+  if (Encoding & (DW_EH_PE_indirect | DW_EH_PE_pcrel)) {
+    SmallString<128> Name;
     Mang->getNameWithPrefix(Name, GV, false);
-    StubSym = getContext().GetOrCreateSymbol(Name.str());
+    const MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+    const MCExpr *Res =
+      X86MCTargetExpr::Create(Sym, X86MCTargetExpr::GOTPCREL, getContext());
+    const MCExpr *Four = MCConstantExpr::Create(4, getContext());
+    return MCBinaryExpr::CreateAdd(Res, Four, getContext());
   }
-  
-  return MCSymbolRefExpr::Create(Sym, getContext());
+
+  return TargetLoweringObjectFileMachO::
+    getSymbolForDwarfGlobalReference(GV, Mang, MMI, Encoding);
 }
 
-const MCExpr *X8664_MachoTargetObjectFile::
-getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
-                                 MachineModuleInfo *MMI,
-                                 bool &IsIndirect, bool &IsPCRel) const {
-  
-  // On Darwin/X86-64, we can reference dwarf symbols with foo@GOTPCREL+4, which
-  // is an indirect pc-relative reference.
-  IsIndirect = true;
-  IsPCRel    = true;
-  
-  // FIXME: Use GetSymbolWithGlobalValueBase.
-  SmallString<128> Name;
-  Mang->getNameWithPrefix(Name, GV, false);
-  const MCSymbol *Sym = getContext().CreateSymbol(Name);
-  const MCExpr *Res =
-    X86MCTargetExpr::Create(Sym, X86MCTargetExpr::GOTPCREL, getContext());
-  const MCExpr *Four = MCConstantExpr::Create(4, getContext());
-  return MCBinaryExpr::CreateAdd(Res, Four, getContext());
+unsigned X8632_ELFTargetObjectFile::getPersonalityEncoding() const {
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+  else
+    return DW_EH_PE_absptr;
+}
+
+unsigned X8632_ELFTargetObjectFile::getLSDAEncoding() const {
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+  else
+    return DW_EH_PE_absptr;
+}
+
+unsigned X8632_ELFTargetObjectFile::getFDEEncoding() const {
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+  else
+    return DW_EH_PE_absptr;
+}
+
+unsigned X8632_ELFTargetObjectFile::getTTypeEncoding() const {
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+  else
+    return DW_EH_PE_absptr;
+}
+
+unsigned X8664_ELFTargetObjectFile::getPersonalityEncoding() const {
+  CodeModel::Model Model = TM.getCodeModel();
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return DW_EH_PE_indirect | DW_EH_PE_pcrel | (Model == CodeModel::Small ||
+                                                 Model == CodeModel::Medium ?
+                                            DW_EH_PE_sdata4 : DW_EH_PE_sdata8);
+
+  if (Model == CodeModel::Small || Model == CodeModel::Medium)
+    return DW_EH_PE_udata4;
+
+  return DW_EH_PE_absptr;
+}
+
+unsigned X8664_ELFTargetObjectFile::getLSDAEncoding() const {
+  CodeModel::Model Model = TM.getCodeModel();
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return DW_EH_PE_pcrel | (Model == CodeModel::Small ?
+                             DW_EH_PE_sdata4 : DW_EH_PE_sdata8);
+
+  if (Model == CodeModel::Small)
+    return DW_EH_PE_udata4;
+
+  return DW_EH_PE_absptr;
+}
+
+unsigned X8664_ELFTargetObjectFile::getFDEEncoding() const {
+  CodeModel::Model Model = TM.getCodeModel();
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return DW_EH_PE_pcrel | (Model == CodeModel::Small ||
+                             Model == CodeModel::Medium ?
+                             DW_EH_PE_sdata4 : DW_EH_PE_sdata8);
+
+  if (Model == CodeModel::Small || Model == CodeModel::Medium)
+    return DW_EH_PE_udata4;
+
+  return DW_EH_PE_absptr;
 }
 
+unsigned X8664_ELFTargetObjectFile::getTTypeEncoding() const {
+  CodeModel::Model Model = TM.getCodeModel();
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return DW_EH_PE_indirect | DW_EH_PE_pcrel | (Model == CodeModel::Small ||
+                                                 Model == CodeModel::Medium ?
+                                            DW_EH_PE_sdata4 : DW_EH_PE_sdata8);
+
+  if (Model == CodeModel::Small)
+    return DW_EH_PE_udata4;
+
+  return DW_EH_PE_absptr;
+}
diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h
index 377a93b..0444417 100644
--- a/lib/Target/X86/X86TargetObjectFile.h
+++ b/lib/Target/X86/X86TargetObjectFile.h
@@ -10,21 +10,13 @@
 #ifndef LLVM_TARGET_X86_TARGETOBJECTFILE_H
 #define LLVM_TARGET_X86_TARGETOBJECTFILE_H
 
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 
 namespace llvm {
-  
-  /// X8632_MachoTargetObjectFile - This TLOF implementation is used for
-  /// Darwin/x86-32.
-  class X8632_MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
-  public:
-    
-    virtual const MCExpr *
-    getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
-                                     MachineModuleInfo *MMI,
-                                     bool &IsIndirect, bool &IsPCRel) const;
-  };
-  
+  class X86TargetMachine;
+
   /// X8664_MachoTargetObjectFile - This TLOF implementation is used for
   /// Darwin/x86-64.
   class X8664_MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
@@ -32,9 +24,31 @@ namespace llvm {
 
     virtual const MCExpr *
     getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
-                                     MachineModuleInfo *MMI,
-                                     bool &IsIndirect, bool &IsPCRel) const;
+                              MachineModuleInfo *MMI, unsigned Encoding) const;
+  };
+
+  class X8632_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
+    const X86TargetMachine &TM;
+  public:
+    X8632_ELFTargetObjectFile(const X86TargetMachine &tm)
+      :TM(tm) { }
+    virtual unsigned getPersonalityEncoding() const;
+    virtual unsigned getLSDAEncoding() const;
+    virtual unsigned getFDEEncoding() const;
+    virtual unsigned getTTypeEncoding() const;
+  };
+
+  class X8664_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
+    const X86TargetMachine &TM;
+  public:
+    X8664_ELFTargetObjectFile(const X86TargetMachine &tm)
+      :TM(tm) { }
+    virtual unsigned getPersonalityEncoding() const;
+    virtual unsigned getLSDAEncoding() const;
+    virtual unsigned getFDEEncoding() const;
+    virtual unsigned getTTypeEncoding() const;
   };
+
 } // end namespace llvm
 
 #endif
diff --git a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
index d18f55d..82e23a1 100644
--- a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
@@ -27,6 +27,7 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/TargetData.h"
@@ -62,6 +63,11 @@ namespace {
     }
 
     void printMemOperand(const MachineInstr *MI, int opNum);
+    void printInlineJT(const MachineInstr *MI, int opNum,
+                       const std::string &directive = ".jmptable");
+    void printInlineJT32(const MachineInstr *MI, int opNum) {
+      printInlineJT(MI, opNum, ".jmptable32");
+    }
     void printOperand(const MachineInstr *MI, int opNum);
     bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
                         unsigned AsmVariant, const char *ExtraCode);
@@ -257,6 +263,23 @@ void XCoreAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum)
   printOperand(MI, opNum+1);
 }
 
+void XCoreAsmPrinter::
+printInlineJT(const MachineInstr *MI, int opNum, const std::string &directive)
+{
+  unsigned JTI = MI->getOperand(opNum).getIndex();
+  const MachineFunction *MF = MI->getParent()->getParent();
+  const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+  const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+  O << "\t" << directive << " ";
+  for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+    MachineBasicBlock *MBB = JTBBs[i];
+    if (i > 0)
+      O << ",";
+    O << *MBB->getSymbol(OutContext);
+  }
+}
+
 void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
   const MachineOperand &MO = MI->getOperand(opNum);
   switch (MO.getType()) {
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 383fd91..b1ab132 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -65,8 +65,6 @@ namespace {
     bool SelectADDRcpii(SDNode *Op, SDValue Addr, SDValue &Base,
                         SDValue &Offset);
     
-    virtual void InstructionSelect();
-
     virtual const char *getPassName() const {
       return "XCore DAG->DAG Pattern Instruction Selection";
     } 
@@ -147,15 +145,6 @@ bool XCoreDAGToDAGISel::SelectADDRcpii(SDNode *Op, SDValue Addr,
   return false;
 }
 
-/// InstructionSelect - This callback is invoked by
-/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-void XCoreDAGToDAGISel::InstructionSelect() {
-  // Select target instructions for the DAG.
-  SelectRoot(*CurDAG);
-  
-  CurDAG->RemoveDeadNodes();
-}
-
 SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
   EVT NVT = N->getValueType(0);
@@ -164,7 +153,11 @@ SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
       default: break;
       case ISD::Constant: {
         if (Predicate_immMskBitp(N)) {
-          SDValue MskSize = Transform_msksize_xform(N);
+          // Transformation function: get the size of a mask
+          int64_t MaskVal = cast<ConstantSDNode>(N)->getZExtValue();
+          assert(isMask_32(MaskVal));
+          // Look for the first non-zero bit
+          SDValue MskSize = getI32Imm(32 - CountLeadingZeros_32(MaskVal));
           return CurDAG->getMachineNode(XCore::MKMSK_rus, dl,
                                         MVT::i32, MskSize);
         }
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index bf8c38f..e6515d8 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -29,6 +29,7 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/CodeGen/ValueTypes.h"
@@ -53,6 +54,8 @@ getTargetNodeName(unsigned Opcode) const
     case XCoreISD::RETSP             : return "XCoreISD::RETSP";
     case XCoreISD::LADD              : return "XCoreISD::LADD";
     case XCoreISD::LSUB              : return "XCoreISD::LSUB";
+    case XCoreISD::BR_JT             : return "XCoreISD::BR_JT";
+    case XCoreISD::BR_JT32           : return "XCoreISD::BR_JT32";
     default                           : return NULL;
   }
 }
@@ -106,9 +109,8 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
   
   setOperationAction(ISD::TRAP, MVT::Other, Legal);
   
-  // Expand jump tables for now
-  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
-  setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+  // Jump tables.
+  setOperationAction(ISD::BR_JT, MVT::Other, Custom);
 
   setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
   setOperationAction(ISD::BlockAddress, MVT::i32 , Custom);
@@ -157,7 +159,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) {
   case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
   case ISD::BlockAddress:     return LowerBlockAddress(Op, DAG);
   case ISD::ConstantPool:     return LowerConstantPool(Op, DAG);
-  case ISD::JumpTable:        return LowerJumpTable(Op, DAG);
+  case ISD::BR_JT:            return LowerBR_JT(Op, DAG);
   case ISD::LOAD:             return LowerLOAD(Op, DAG);
   case ISD::STORE:            return LowerSTORE(Op, DAG);
   case ISD::SELECT_CC:        return LowerSELECT_CC(Op, DAG);
@@ -315,14 +317,27 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG)
 }
 
 SDValue XCoreTargetLowering::
-LowerJumpTable(SDValue Op, SelectionDAG &DAG)
+LowerBR_JT(SDValue Op, SelectionDAG &DAG)
 {
-  // FIXME there isn't really debug info here
+  SDValue Chain = Op.getOperand(0);
+  SDValue Table = Op.getOperand(1);
+  SDValue Index = Op.getOperand(2);
   DebugLoc dl = Op.getDebugLoc();
-  EVT PtrVT = Op.getValueType();
-  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
-  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
-  return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, JTI);
+  JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
+  unsigned JTI = JT->getIndex();
+  MachineFunction &MF = DAG.getMachineFunction();
+  const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+  SDValue TargetJT = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
+
+  unsigned NumEntries = MJTI->getJumpTables()[JTI].MBBs.size();
+  if (NumEntries <= 32) {
+    return DAG.getNode(XCoreISD::BR_JT, dl, MVT::Other, Chain, TargetJT, Index);
+  }
+  assert((NumEntries >> 31) == 0);
+  SDValue ScaledIndex = DAG.getNode(ISD::SHL, dl, MVT::i32, Index,
+                                    DAG.getConstant(1, MVT::i32));
+  return DAG.getNode(XCoreISD::BR_JT32, dl, MVT::Other, Chain, TargetJT,
+                     ScaledIndex);
 }
 
 static bool
@@ -390,7 +405,12 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG)
     if (Offset % 4 == 0) {
       // We've managed to infer better alignment information than the load
       // already has. Use an aligned load.
-      return DAG.getLoad(getPointerTy(), dl, Chain, BasePtr, NULL, 4);
+      //
+      // FIXME: No new alignment information is actually passed here.
+      // Should the offset really be 4?
+      //
+      return DAG.getLoad(getPointerTy(), dl, Chain, BasePtr, NULL, 4,
+                         false, false, 0);
     }
     // Lower to
     // ldw low, base[offset >> 2]
@@ -407,9 +427,9 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG)
     SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Base, HighOffset);
     
     SDValue Low = DAG.getLoad(getPointerTy(), dl, Chain,
-                               LowAddr, NULL, 4);
+                              LowAddr, NULL, 4, false, false, 0);
     SDValue High = DAG.getLoad(getPointerTy(), dl, Chain,
-                               HighAddr, NULL, 4);
+                               HighAddr, NULL, 4, false, false, 0);
     SDValue LowShifted = DAG.getNode(ISD::SRL, dl, MVT::i32, Low, LowShift);
     SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High, HighShift);
     SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, LowShifted, HighShifted);
@@ -423,12 +443,13 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG)
     int SVOffset = LD->getSrcValueOffset();
     SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain,
                                  BasePtr, LD->getSrcValue(), SVOffset, MVT::i16,
-                                 LD->isVolatile(), 2);
+                                 LD->isVolatile(), LD->isNonTemporal(), 2);
     SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr,
                                    DAG.getConstant(2, MVT::i32));
     SDValue High = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::i32, Chain,
                                   HighAddr, LD->getSrcValue(), SVOffset + 2,
-                                  MVT::i16, LD->isVolatile(), 2);
+                                  MVT::i16, LD->isVolatile(),
+                                  LD->isNonTemporal(), 2);
     SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High,
                                       DAG.getConstant(16, MVT::i32));
     SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, Low, HighShifted);
@@ -452,7 +473,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG)
                     false, false, 0, CallingConv::C, false,
                     /*isReturnValueUsed=*/true,
                     DAG.getExternalSymbol("__misaligned_load", getPointerTy()),
-                    Args, DAG, dl, DAG.GetOrdering(Chain.getNode()));
+                    Args, DAG, dl);
 
   SDValue Ops[] =
     { CallResult.first, CallResult.second };
@@ -487,12 +508,14 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG)
                                       DAG.getConstant(16, MVT::i32));
     SDValue StoreLow = DAG.getTruncStore(Chain, dl, Low, BasePtr,
                                          ST->getSrcValue(), SVOffset, MVT::i16,
-                                         ST->isVolatile(), 2);
+                                         ST->isVolatile(), ST->isNonTemporal(),
+                                         2);
     SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr,
                                    DAG.getConstant(2, MVT::i32));
     SDValue StoreHigh = DAG.getTruncStore(Chain, dl, High, HighAddr,
                                           ST->getSrcValue(), SVOffset + 2,
-                                          MVT::i16, ST->isVolatile(), 2);
+                                          MVT::i16, ST->isVolatile(),
+                                          ST->isNonTemporal(), 2);
     return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StoreLow, StoreHigh);
   }
   
@@ -513,7 +536,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG)
                     false, false, 0, CallingConv::C, false,
                     /*isReturnValueUsed=*/true,
                     DAG.getExternalSymbol("__misaligned_store", getPointerTy()),
-                    Args, DAG, dl, DAG.GetOrdering(Chain.getNode()));
+                    Args, DAG, dl);
 
   return CallResult.second;
 }
@@ -561,15 +584,16 @@ LowerVAARG(SDValue Op, SelectionDAG &DAG)
   const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
   EVT VT = Node->getValueType(0);
   SDValue VAList = DAG.getLoad(getPointerTy(), dl, Node->getOperand(0),
-                               Node->getOperand(1), V, 0);
+                               Node->getOperand(1), V, 0, false, false, 0);
   // Increment the pointer, VAList, to the next vararg
   SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList, 
                      DAG.getConstant(VT.getSizeInBits(), 
                                      getPointerTy()));
   // Store the incremented VAList to the legalized pointer
-  Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1), V, 0);
+  Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1), V, 0,
+                      false, false, 0);
   // Load the actual argument out of the pointer VAList
-  return DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0);
+  return DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0, false, false, 0);
 }
 
 SDValue XCoreTargetLowering::
@@ -582,7 +606,8 @@ LowerVASTART(SDValue Op, SelectionDAG &DAG)
   XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
   SDValue Addr = DAG.getFrameIndex(XFI->getVarArgsFrameIndex(), MVT::i32);
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
-  return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1), SV, 0);
+  return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1), SV, 0,
+                      false, false, 0);
 }
 
 SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
@@ -877,7 +902,8 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
       // Create the SelectionDAG nodes corresponding to a load
       //from this parameter
       SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
-      InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, NULL, 0));
+      InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, NULL, 0,
+                                   false, false, 0));
     }
   }
   
@@ -908,7 +934,8 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
         RegInfo.addLiveIn(ArgRegs[i], VReg);
         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
         // Move argument from virt reg -> stack
-        SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
+        SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0,
+                                     false, false, 0);
         MemOps.push_back(Store);
       }
       if (!MemOps.empty())
@@ -1134,10 +1161,8 @@ static inline bool isImmUs4(int64_t val)
 bool
 XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, 
                                               const Type *Ty) const {
-  // Be conservative with void
-  // FIXME: Can we be more aggressive?
   if (Ty->getTypeID() == Type::VoidTyID)
-    return false;
+    return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs);
 
   const TargetData *TD = TM.getTargetData();
   unsigned Size = TD->getTypeAllocSize(Ty);
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index f7b620e..0c638af 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -28,7 +28,7 @@ namespace llvm {
   namespace XCoreISD {
     enum NodeType {
       // Start the numbering where the builtin ops and target ops leave off.
-      FIRST_NUMBER = ISD::BUILTIN_OP_END+XCore::INSTRUCTION_LIST_END,
+      FIRST_NUMBER = ISD::BUILTIN_OP_END,
 
       // Branch and link (call)
       BL,
@@ -52,7 +52,13 @@ namespace llvm {
       LADD,
 
       // Corresponds to LSUB instruction
-      LSUB
+      LSUB,
+
+      // Jumptable branch.
+      BR_JT,
+
+      // Jumptable branch using long branches for each entry.
+      BR_JT32
     };
   }
 
@@ -122,7 +128,7 @@ namespace llvm {
     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG);
     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG);
-    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG);
     SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG);
     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG);
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index 5a54844..722e747 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -145,6 +145,11 @@ static inline bool IsCondBranch(unsigned BrOpc) {
   return IsBRF(BrOpc) || IsBRT(BrOpc);
 }
 
+static inline bool IsBR_JT(unsigned BrOpc) {
+  return BrOpc == XCore::BR_JT
+      || BrOpc == XCore::BR_JT32;
+}
+
 /// GetCondFromBranchOpc - Return the XCore CC that matches 
 /// the correspondent Branch instruction opcode.
 static XCore::CondCode GetCondFromBranchOpc(unsigned BrOpc) 
@@ -271,6 +276,14 @@ XCoreInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
     return false;
   }
 
+  // Likewise if it ends with a branch table followed by an unconditional branch.
+  if (IsBR_JT(SecondLastInst->getOpcode()) && IsBRU(LastInst->getOpcode())) {
+    I = LastInst;
+    if (AllowModify)
+      I->eraseFromParent();
+    return true;
+  }
+
   // Otherwise, can't handle this.
   return true;
 }
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index 10dc18c..46805d5 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -34,6 +34,15 @@ def XCoreBranchLink     : SDNode<"XCoreISD::BL",SDT_XCoreBranchLink,
 def XCoreRetsp       : SDNode<"XCoreISD::RETSP", SDTNone,
                          [SDNPHasChain, SDNPOptInFlag]>;
 
+def SDT_XCoreBR_JT    : SDTypeProfile<0, 2,
+                                      [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
+def XCoreBR_JT : SDNode<"XCoreISD::BR_JT", SDT_XCoreBR_JT,
+                        [SDNPHasChain]>;
+
+def XCoreBR_JT32 : SDNode<"XCoreISD::BR_JT32", SDT_XCoreBR_JT,
+                        [SDNPHasChain]>;
+
 def SDT_XCoreAddress    : SDTypeProfile<1, 1,
                             [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
 
@@ -185,6 +194,15 @@ def MEMii : Operand<i32> {
   let MIOperandInfo = (ops i32imm, i32imm);
 }
 
+// Jump tables.
+def InlineJT : Operand<i32> {
+  let PrintMethod = "printInlineJT";
+}
+
+def InlineJT32 : Operand<i32> {
+  let PrintMethod = "printInlineJT32";
+}
+
 //===----------------------------------------------------------------------===//
 // Instruction Class Templates
 //===----------------------------------------------------------------------===//
@@ -624,7 +642,7 @@ defm RETSP : FU6_LU6<"retsp", XCoreRetsp>;
 
 // TODO extdp, kentsp, krestsp, blat, setsr
 // clrsr, getsr, kalli
-let isBranch = 1, isTerminator = 1 in {
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
 def BRBU_u6 : _FU6<
                  (outs),
                  (ins brtarget:$target),
@@ -756,24 +774,34 @@ def CLZ_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
 
 // One operand short
 // TODO edu, eeu, waitet, waitef, freer, tstart, msync, mjoin, syncr, clrtp
-// bru, setdp, setcp, setv, setev, kcall
+// setdp, setcp, setv, setev, kcall
 // dgetreg
-let isBranch=1, isIndirectBranch=1, isTerminator=1 in
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
 def BAU_1r : _F1R<(outs), (ins GRRegs:$addr),
                  "bau $addr",
                  [(brind GRRegs:$addr)]>;
 
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
+def BR_JT : PseudoInstXCore<(outs), (ins InlineJT:$t, GRRegs:$i),
+                            "bru $i\n$t",
+                            [(XCoreBR_JT tjumptable:$t, GRRegs:$i)]>;
+
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
+def BR_JT32 : PseudoInstXCore<(outs), (ins InlineJT32:$t, GRRegs:$i),
+                              "bru $i\n$t",
+                              [(XCoreBR_JT32 tjumptable:$t, GRRegs:$i)]>;
+
 let Defs=[SP], neverHasSideEffects=1 in
 def SETSP_1r : _F1R<(outs), (ins GRRegs:$src),
                  "set sp, $src",
                  []>;
 
-let isBarrier = 1, hasCtrlDep = 1 in 
+let hasCtrlDep = 1 in 
 def ECALLT_1r : _F1R<(outs), (ins GRRegs:$src),
                  "ecallt $src",
                  []>;
 
-let isBarrier = 1, hasCtrlDep = 1 in 
+let hasCtrlDep = 1 in 
 def ECALLF_1r : _F1R<(outs), (ins GRRegs:$src),
                  "ecallf $src",
                  []>;
diff --git a/lib/Target/XCore/XCoreTargetObjectFile.h b/lib/Target/XCore/XCoreTargetObjectFile.h
index 7efb990..7424c78 100644
--- a/lib/Target/XCore/XCoreTargetObjectFile.h
+++ b/lib/Target/XCore/XCoreTargetObjectFile.h
@@ -10,13 +10,12 @@
 #ifndef LLVM_TARGET_XCORE_TARGETOBJECTFILE_H
 #define LLVM_TARGET_XCORE_TARGETOBJECTFILE_H
 
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 
 namespace llvm {
 
   class XCoreTargetObjectFile : public TargetLoweringObjectFileELF {
   public:
-    
     void Initialize(MCContext &Ctx, const TargetMachine &TM);
 
     // TODO: Classify globals as xcore wishes.
diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp
index eac4e17..37d7a00 100644
--- a/lib/Transforms/Hello/Hello.cpp
+++ b/lib/Transforms/Hello/Hello.cpp
@@ -15,7 +15,6 @@
 #define DEBUG_TYPE "hello"
 #include "llvm/Pass.h"
 #include "llvm/Function.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 325d353..7cb1367 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -124,7 +124,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
   unsigned ArgNo = 0;
   for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
        I != E; ++I, ++ArgNo)
-    if (isa<PointerType>(I->getType()))
+    if (I->getType()->isPointerTy())
       PointerArgs.push_back(std::pair<Argument*, unsigned>(I, ArgNo));
   if (PointerArgs.empty()) return 0;
 
@@ -317,7 +317,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
   GEPIndicesSet ToPromote;
 
   // If the pointer is always valid, any load with first index 0 is valid.
-  if(isByVal || AllCalleesPassInValidPointerForArgument(Arg))
+  if (isByVal || AllCalleesPassInValidPointerForArgument(Arg))
     SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
 
   // First, iterate the entry block and mark loads of (geps of) arguments as
@@ -673,7 +673,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
                  IE = SI->end(); II != IE; ++II) {
               // Use i32 to index structs, and i64 for others (pointers/arrays).
               // This satisfies GEP constraints.
-              const Type *IdxTy = (isa<StructType>(ElTy) ?
+              const Type *IdxTy = (ElTy->isStructTy() ?
                     Type::getInt32Ty(F->getContext()) : 
                     Type::getInt64Ty(F->getContext()));
               Ops.push_back(ConstantInt::get(IdxTy, *II));
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index 4972687..3c05f88 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -19,10 +19,11 @@
 
 #define DEBUG_TYPE "constmerge"
 #include "llvm/Transforms/IPO.h"
+#include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Statistic.h"
-#include <map>
 using namespace llvm;
 
 STATISTIC(NumMerged, "Number of global constants merged");
@@ -48,10 +49,10 @@ ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); }
 bool ConstantMerge::runOnModule(Module &M) {
   // Map unique constant/section pairs to globals.  We don't want to merge
   // globals in different sections.
-  std::map<std::pair<Constant*, std::string>, GlobalVariable*> CMap;
+  DenseMap<Constant*, GlobalVariable*> CMap;
 
   // Replacements - This vector contains a list of replacements to perform.
-  std::vector<std::pair<GlobalVariable*, GlobalVariable*> > Replacements;
+  SmallVector<std::pair<GlobalVariable*, GlobalVariable*>, 32> Replacements;
 
   bool MadeChange = false;
 
@@ -76,19 +77,21 @@ bool ConstantMerge::runOnModule(Module &M) {
         continue;
       }
       
-      // Only process constants with initializers.
-      if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
-        Constant *Init = GV->getInitializer();
-
-        // Check to see if the initializer is already known.
-        GlobalVariable *&Slot = CMap[std::make_pair(Init, GV->getSection())];
-
-        if (Slot == 0) {    // Nope, add it to the map.
-          Slot = GV;
-        } else if (GV->hasLocalLinkage()) {    // Yup, this is a duplicate!
-          // Make all uses of the duplicate constant use the canonical version.
-          Replacements.push_back(std::make_pair(GV, Slot));
-        }
+      // Only process constants with initializers in the default addres space.
+      if (!GV->isConstant() ||!GV->hasDefinitiveInitializer() ||
+          GV->getType()->getAddressSpace() != 0 || !GV->getSection().empty())
+        continue;
+      
+      Constant *Init = GV->getInitializer();
+
+      // Check to see if the initializer is already known.
+      GlobalVariable *&Slot = CMap[Init];
+
+      if (Slot == 0) {    // Nope, add it to the map.
+        Slot = GV;
+      } else if (GV->hasLocalLinkage()) {    // Yup, this is a duplicate!
+        // Make all uses of the duplicate constant use the canonical version.
+        Replacements.push_back(std::make_pair(GV, Slot));
       }
     }
 
@@ -100,11 +103,11 @@ bool ConstantMerge::runOnModule(Module &M) {
     // now.  This avoid invalidating the pointers in CMap, which are unneeded
     // now.
     for (unsigned i = 0, e = Replacements.size(); i != e; ++i) {
-      // Eliminate any uses of the dead global...
+      // Eliminate any uses of the dead global.
       Replacements[i].first->replaceAllUsesWith(Replacements[i].second);
 
-      // Delete the global value from the module...
-      M.getGlobalList().erase(Replacements[i].first);
+      // Delete the global value from the module.
+      Replacements[i].first->eraseFromParent();
     }
 
     NumMerged += Replacements.size();
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 1749b1e..f386ed7 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -796,7 +796,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
         // Replace by null for now.
         Call->replaceAllUsesWith(Constant::getNullValue(Call->getType()));
       } else {
-        assert(isa<StructType>(RetTy) &&
+        assert(RetTy->isStructTy() &&
                "Return type changed, but not into a void. The old return type"
                " must have been a struct!");
         Instruction *InsertPt = Call;
@@ -870,7 +870,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
         if (NFTy->getReturnType() == Type::getVoidTy(F->getContext())) {
           RetVal = 0;
         } else {
-          assert (isa<StructType>(RetTy));
+          assert (RetTy->isStructTy());
           // The original return value was a struct, insert
           // extractvalue/insertvalue chains to extract only the values we need
           // to return and insert them into our new result.
diff --git a/lib/Transforms/IPO/DeadTypeElimination.cpp b/lib/Transforms/IPO/DeadTypeElimination.cpp
index 025d77e..662fbb5 100644
--- a/lib/Transforms/IPO/DeadTypeElimination.cpp
+++ b/lib/Transforms/IPO/DeadTypeElimination.cpp
@@ -57,13 +57,13 @@ ModulePass *llvm::createDeadTypeEliminationPass() {
 //
 static inline bool ShouldNukeSymtabEntry(const Type *Ty){
   // Nuke all names for primitive types!
-  if (Ty->isPrimitiveType() || Ty->isInteger()) 
+  if (Ty->isPrimitiveType() || Ty->isIntegerTy()) 
     return true;
 
   // Nuke all pointers to primitive types as well...
   if (const PointerType *PT = dyn_cast<PointerType>(Ty))
     if (PT->getElementType()->isPrimitiveType() ||
-        PT->getElementType()->isInteger()) 
+        PT->getElementType()->isIntegerTy()) 
       return true;
 
   return false;
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 64a6d78..298d5cf 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -175,7 +175,7 @@ bool FunctionAttrs::AddReadAttrs(const std::vector<CallGraphNode *> &SCC) {
             for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
                  CI != CE; ++CI) {
               Value *Arg = *CI;
-              if (isa<PointerType>(Arg->getType()) && !PointsToLocalMemory(Arg))
+              if (Arg->getType()->isPointerTy() && !PointsToLocalMemory(Arg))
                 // Writes memory.  Just give up.
                 return false;
             }
@@ -257,7 +257,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const std::vector<CallGraphNode *> &SCC) {
       continue;
 
     for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A!=E; ++A)
-      if (isa<PointerType>(A->getType()) && !A->hasNoCaptureAttr() &&
+      if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr() &&
           !PointerMayBeCaptured(A, true, /*StoreCaptures=*/false)) {
         A->addAttr(Attribute::NoCapture);
         ++NumNoCapture;
@@ -362,7 +362,7 @@ bool FunctionAttrs::AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC) {
 
     // We annotate noalias return values, which are only applicable to 
     // pointer types.
-    if (!isa<PointerType>(F->getReturnType()))
+    if (!F->getReturnType()->isPointerTy())
       continue;
 
     if (!IsFunctionMallocLike(F, SCCNodes))
@@ -372,7 +372,7 @@ bool FunctionAttrs::AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC) {
   bool MadeChange = false;
   for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
     Function *F = SCC[i]->getFunction();
-    if (F->doesNotAlias(0) || !isa<PointerType>(F->getReturnType()))
+    if (F->doesNotAlias(0) || !F->getReturnType()->isPointerTy())
       continue;
 
     F->setDoesNotAlias(0);
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index ac91631..7b1e9c0 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -303,7 +303,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
           SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
         Changed |= CleanupConstantGlobalUsers(CE, SubInit);
       } else if (CE->getOpcode() == Instruction::BitCast && 
-                 isa<PointerType>(CE->getType())) {
+                 CE->getType()->isPointerTy()) {
         // Pointer cast, delete any stores and memsets to the global.
         Changed |= CleanupConstantGlobalUsers(CE, 0);
       }
@@ -431,7 +431,7 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
       else if (const VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI))
         NumElements = SubVectorTy->getNumElements();
       else {
-        assert(isa<StructType>(*GEPI) &&
+        assert((*GEPI)->isStructTy() &&
                "Indexed GEP type is not array, vector, or struct!");
         continue;
       }
@@ -543,7 +543,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
 
   if (NewGlobals.empty())
     return 0;
-
+  
   DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV);
 
   Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
@@ -642,7 +642,7 @@ static bool AllUsesOfValueWillTrapIfNull(Value *V,
         return false;
     } else if (isa<ICmpInst>(*UI) &&
                isa<ConstantPointerNull>(UI->getOperand(1))) {
-      // Ignore setcc X, null
+      // Ignore icmp X, null
     } else {
       //cerr << "NONTRAPPING USE: " << **UI;
       return false;
@@ -813,57 +813,47 @@ static void ConstantPropUsersOf(Value *V) {
 static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
                                                      CallInst *CI,
                                                      const Type *AllocTy,
-                                                     Value* NElems,
+                                                     ConstantInt *NElements,
                                                      TargetData* TD) {
-  DEBUG(dbgs() << "PROMOTING GLOBAL: " << *GV << "  CALL = " << *CI << '\n');
-
-  const Type *IntPtrTy = TD->getIntPtrType(GV->getContext());
+  DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << "  CALL = " << *CI << '\n');
   
-  // CI has either 0 or 1 bitcast uses (getMallocType() would otherwise have
-  // returned NULL and we would not be here).
-  BitCastInst *BCI = NULL;
-  for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); UI != E; )
-    if ((BCI = dyn_cast<BitCastInst>(cast<Instruction>(*UI++))))
-      break;
-
-  ConstantInt *NElements = cast<ConstantInt>(NElems);
-  if (NElements->getZExtValue() != 1) {
-    // If we have an array allocation, transform it to a single element
-    // allocation to make the code below simpler.
-    Type *NewTy = ArrayType::get(AllocTy, NElements->getZExtValue());
-    unsigned TypeSize = TD->getTypeAllocSize(NewTy);
-    if (const StructType *ST = dyn_cast<StructType>(NewTy))
-      TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
-    Instruction *NewCI = CallInst::CreateMalloc(CI, IntPtrTy, NewTy,
-                                         ConstantInt::get(IntPtrTy, TypeSize));
-    Value* Indices[2];
-    Indices[0] = Indices[1] = Constant::getNullValue(IntPtrTy);
-    Value *NewGEP = GetElementPtrInst::Create(NewCI, Indices, Indices + 2,
-                                              NewCI->getName()+".el0", CI);
-    Value *Cast = new BitCastInst(NewGEP, CI->getType(), "el0", CI);
-    if (BCI) BCI->replaceAllUsesWith(NewGEP);
-    CI->replaceAllUsesWith(Cast);
-    if (BCI) BCI->eraseFromParent();
-    CI->eraseFromParent();
-    BCI = dyn_cast<BitCastInst>(NewCI);
-    CI = BCI ? extractMallocCallFromBitCast(BCI) : cast<CallInst>(NewCI);
-  }
+  const Type *GlobalType;
+  if (NElements->getZExtValue() == 1)
+    GlobalType = AllocTy;
+  else
+    // If we have an array allocation, the global variable is of an array.
+    GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue());
 
   // Create the new global variable.  The contents of the malloc'd memory is
   // undefined, so initialize with an undef value.
-  const Type *MAT = getMallocAllocatedType(CI);
-  Constant *Init = UndefValue::get(MAT);
   GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(), 
-                                             MAT, false,
-                                             GlobalValue::InternalLinkage, Init,
+                                             GlobalType, false,
+                                             GlobalValue::InternalLinkage,
+                                             UndefValue::get(GlobalType),
                                              GV->getName()+".body",
                                              GV,
                                              GV->isThreadLocal());
   
-  // Anything that used the malloc or its bitcast now uses the global directly.
-  if (BCI) BCI->replaceAllUsesWith(NewGV);
-  CI->replaceAllUsesWith(new BitCastInst(NewGV, CI->getType(), "newgv", CI));
-
+  // If there are bitcast users of the malloc (which is typical, usually we have
+  // a malloc + bitcast) then replace them with uses of the new global.  Update
+  // other users to use the global as well.
+  BitCastInst *TheBC = 0;
+  while (!CI->use_empty()) {
+    Instruction *User = cast<Instruction>(CI->use_back());
+    if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
+      if (BCI->getType() == NewGV->getType()) {
+        BCI->replaceAllUsesWith(NewGV);
+        BCI->eraseFromParent();
+      } else {
+        BCI->setOperand(0, NewGV);
+      }
+    } else {
+      if (TheBC == 0)
+        TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI);
+      User->replaceUsesOfWith(CI, TheBC);
+    }
+  }
+  
   Constant *RepValue = NewGV;
   if (NewGV->getType() != GV->getType()->getElementType())
     RepValue = ConstantExpr::getBitCast(RepValue, 
@@ -879,60 +869,60 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
   bool InitBoolUsed = false;
 
   // Loop over all uses of GV, processing them in turn.
-  std::vector<StoreInst*> Stores;
-  while (!GV->use_empty())
-    if (LoadInst *LI = dyn_cast<LoadInst>(GV->use_back())) {
-      while (!LI->use_empty()) {
-        Use &LoadUse = LI->use_begin().getUse();
-        if (!isa<ICmpInst>(LoadUse.getUser()))
-          LoadUse = RepValue;
-        else {
-          ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser());
-          // Replace the cmp X, 0 with a use of the bool value.
-          Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI);
-          InitBoolUsed = true;
-          switch (ICI->getPredicate()) {
-          default: llvm_unreachable("Unknown ICmp Predicate!");
-          case ICmpInst::ICMP_ULT:
-          case ICmpInst::ICMP_SLT:   // X < null -> always false
-            LV = ConstantInt::getFalse(GV->getContext());
-            break;
-          case ICmpInst::ICMP_ULE:
-          case ICmpInst::ICMP_SLE:
-          case ICmpInst::ICMP_EQ:
-            LV = BinaryOperator::CreateNot(LV, "notinit", ICI);
-            break;
-          case ICmpInst::ICMP_NE:
-          case ICmpInst::ICMP_UGE:
-          case ICmpInst::ICMP_SGE:
-          case ICmpInst::ICMP_UGT:
-          case ICmpInst::ICMP_SGT:
-            break;  // no change.
-          }
-          ICI->replaceAllUsesWith(LV);
-          ICI->eraseFromParent();
-        }
-      }
-      LI->eraseFromParent();
-    } else {
-      StoreInst *SI = cast<StoreInst>(GV->use_back());
+  while (!GV->use_empty()) {
+    if (StoreInst *SI = dyn_cast<StoreInst>(GV->use_back())) {
       // The global is initialized when the store to it occurs.
       new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, SI);
       SI->eraseFromParent();
+      continue;
     }
+    
+    LoadInst *LI = cast<LoadInst>(GV->use_back());
+    while (!LI->use_empty()) {
+      Use &LoadUse = LI->use_begin().getUse();
+      if (!isa<ICmpInst>(LoadUse.getUser())) {
+        LoadUse = RepValue;
+        continue;
+      }
+      
+      ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser());
+      // Replace the cmp X, 0 with a use of the bool value.
+      Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI);
+      InitBoolUsed = true;
+      switch (ICI->getPredicate()) {
+      default: llvm_unreachable("Unknown ICmp Predicate!");
+      case ICmpInst::ICMP_ULT:
+      case ICmpInst::ICMP_SLT:   // X < null -> always false
+        LV = ConstantInt::getFalse(GV->getContext());
+        break;
+      case ICmpInst::ICMP_ULE:
+      case ICmpInst::ICMP_SLE:
+      case ICmpInst::ICMP_EQ:
+        LV = BinaryOperator::CreateNot(LV, "notinit", ICI);
+        break;
+      case ICmpInst::ICMP_NE:
+      case ICmpInst::ICMP_UGE:
+      case ICmpInst::ICMP_SGE:
+      case ICmpInst::ICMP_UGT:
+      case ICmpInst::ICMP_SGT:
+        break;  // no change.
+      }
+      ICI->replaceAllUsesWith(LV);
+      ICI->eraseFromParent();
+    }
+    LI->eraseFromParent();
+  }
 
   // If the initialization boolean was used, insert it, otherwise delete it.
   if (!InitBoolUsed) {
     while (!InitBool->use_empty())  // Delete initializations
-      cast<Instruction>(InitBool->use_back())->eraseFromParent();
+      cast<StoreInst>(InitBool->use_back())->eraseFromParent();
     delete InitBool;
   } else
     GV->getParent()->getGlobalList().insert(GV, InitBool);
 
-
-  // Now the GV is dead, nuke it and the malloc (both CI and BCI).
+  // Now the GV is dead, nuke it and the malloc..
   GV->eraseFromParent();
-  if (BCI) BCI->eraseFromParent();
   CI->eraseFromParent();
 
   // To further other optimizations, loop over all users of NewGV and try to
@@ -1303,9 +1293,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
                                         ConstantInt::get(IntPtrTy, TypeSize),
                                         NElems,
                                         CI->getName() + ".f" + Twine(FieldNo));
-    CallInst *NCI = dyn_cast<BitCastInst>(NMI) ?
-                    extractMallocCallFromBitCast(NMI) : cast<CallInst>(NMI);
-    FieldMallocs.push_back(NCI);
+    FieldMallocs.push_back(NMI);
     new StoreInst(NMI, NGV, CI);
   }
   
@@ -1497,7 +1485,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
       // something.
       if (TD && 
           NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) {
-        GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElems, TD);
+        GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD);
         return true;
       }
   
@@ -1556,7 +1544,7 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
   // only has one (non-null) value stored into it, then we can optimize any
   // users of the loaded value (often calls and loads) that would trap if the
   // value was null.
-  if (isa<PointerType>(GV->getInitializer()->getType()) &&
+  if (GV->getInitializer()->getType()->isPointerTy() &&
       GV->getInitializer()->isNullValue()) {
     if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
       if (GV->getInitializer()->getType() != SOVC->getType())
@@ -1590,8 +1578,8 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
   // simplification.  In these cases, we typically end up with "cond ? v1 : v2"
   // where v1 and v2 both require constant pool loads, a big loss.
   if (GVElType == Type::getInt1Ty(GV->getContext()) ||
-      GVElType->isFloatingPoint() ||
-      isa<PointerType>(GVElType) || isa<VectorType>(GVElType))
+      GVElType->isFloatingPointTy() ||
+      GVElType->isPointerTy() || GVElType->isVectorTy())
     return false;
   
   // Walk the use list of the global seeing if all the uses are load or store.
@@ -1925,7 +1913,7 @@ GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) {
       if (!ATy) return 0;
       const StructType *STy = dyn_cast<StructType>(ATy->getElementType());
       if (!STy || STy->getNumElements() != 2 ||
-          !STy->getElementType(0)->isInteger(32)) return 0;
+          !STy->getElementType(0)->isIntegerTy(32)) return 0;
       const PointerType *PFTy = dyn_cast<PointerType>(STy->getElementType(1));
       if (!PFTy) return 0;
       const FunctionType *FTy = dyn_cast<FunctionType>(PFTy->getElementType());
@@ -2148,7 +2136,7 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
     Elts[CI->getZExtValue()] =
       EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
     
-    if (isa<ArrayType>(Init->getType()))
+    if (Init->getType()->isArrayTy())
       return ConstantArray::get(cast<ArrayType>(InitTy), Elts);
     else
       return ConstantVector::get(&Elts[0], Elts.size());
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 97e2f06..752a97c 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -41,12 +41,9 @@ static cl::opt<int>
 InlineLimit("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
         cl::desc("Control the amount of inlining to perform (default = 225)"));
 
-static cl::opt<bool>
-RespectHint("respect-inlinehint", cl::Hidden,
-            cl::desc("Respect the inlinehint attribute"));
-
-// Threshold to use when inlinehint is given.
-const int HintThreshold = 300;
+static cl::opt<int>
+HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325),
+              cl::desc("Threshold for inlining functions with inline hint"));
 
 // Threshold to use when optsize is specified (and there is no -inline-limit).
 const int OptSizeThreshold = 75;
@@ -183,20 +180,22 @@ static bool InlineCallIfPossible(CallSite CS, CallGraph &CG,
 }
 
 unsigned Inliner::getInlineThreshold(CallSite CS) const {
-  // Listen to inlinehint when -respect-inlinehint is given.
-  Function *Callee = CS.getCalledFunction();
-  if (RespectHint && Callee && !Callee->isDeclaration() &&
-      Callee->hasFnAttr(Attribute::InlineHint))
-    return HintThreshold;
+  int thres = InlineThreshold;
 
   // Listen to optsize when -inline-limit is not given.
   Function *Caller = CS.getCaller();
   if (Caller && !Caller->isDeclaration() &&
       Caller->hasFnAttr(Attribute::OptimizeForSize) &&
       InlineLimit.getNumOccurrences() == 0)
-      return OptSizeThreshold;
+    thres = OptSizeThreshold;
+
+  // Listen to inlinehint when it would increase the threshold.
+  Function *Callee = CS.getCalledFunction();
+  if (HintThreshold > thres && Callee && !Callee->isDeclaration() &&
+      Callee->hasFnAttr(Attribute::InlineHint))
+    thres = HintThreshold;
 
-  return InlineThreshold;
+  return thres;
 }
 
 /// shouldInline - Return true if the inliner should attempt to inline
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 0e0d83a..310e4a2 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -214,6 +214,15 @@ static bool StripDebugInfo(Module &M) {
     Changed = true;
   }
 
+  if (Function *DbgVal = M.getFunction("llvm.dbg.value")) {
+    while (!DbgVal->use_empty()) {
+      CallInst *CI = cast<CallInst>(DbgVal->use_back());
+      CI->eraseFromParent();
+    }
+    DbgVal->eraseFromParent();
+    Changed = true;
+  }
+
   NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv");
   if (NMD) {
     Changed = true;
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index 5367900..bd06499 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -117,11 +117,11 @@ public:
   Instruction *visitUDiv(BinaryOperator &I);
   Instruction *visitSDiv(BinaryOperator &I);
   Instruction *visitFDiv(BinaryOperator &I);
-  Instruction *FoldAndOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS);
-  Instruction *FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, FCmpInst *RHS);
+  Value *FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS);
+  Value *FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS);
   Instruction *visitAnd(BinaryOperator &I);
-  Instruction *FoldOrOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS);
-  Instruction *FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, FCmpInst *RHS);
+  Value *FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS);
+  Value *FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS);
   Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op,
                                    Value *A, Value *B, Value *C);
   Instruction *visitOr (BinaryOperator &I);
@@ -199,13 +199,15 @@ private:
                                   SmallVectorImpl<Value*> &NewIndices);
   Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI);
                                  
-  /// ValueRequiresCast - Return true if the cast from "V to Ty" actually
-  /// results in any code being generated.  It does not require codegen if V is
-  /// simple enough or if the cast can be folded into other casts.
-  bool ValueRequiresCast(Instruction::CastOps opcode,const Value *V,
-                         const Type *Ty);
+  /// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
+  /// results in any code being generated and is interesting to optimize out. If
+  /// the cast can be eliminated by some other simple transformation, we prefer
+  /// to do the simplification first.
+  bool ShouldOptimizeCast(Instruction::CastOps opcode,const Value *V,
+                          const Type *Ty);
 
   Instruction *visitCallSite(CallSite CS);
+  Instruction *tryOptimizeCall(CallInst *CI, const TargetData *TD);
   bool transformConstExprCastCall(CallSite CS);
   Instruction *transformCallThroughTrampoline(CallSite CS);
   Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI,
@@ -326,8 +328,8 @@ private:
   
   Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask,
                             bool isSub, Instruction &I);
-  Instruction *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
-                               bool isSigned, bool Inside, Instruction &IB);
+  Value *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
+                         bool isSigned, bool Inside);
   Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI);
   Instruction *MatchBSwap(BinaryOperator &I);
   bool SimplifyStoreAtEndOfBlock(StoreInst &SI);
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index c2924ab..4d2c89e 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -35,7 +35,7 @@ static Constant *SubOne(ConstantInt *C) {
 // Otherwise, return null.
 //
 static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) {
-  if (!V->hasOneUse() || !V->getType()->isInteger())
+  if (!V->hasOneUse() || !V->getType()->isIntegerTy())
     return 0;
   
   Instruction *I = dyn_cast<Instruction>(V);
@@ -145,10 +145,10 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
     }
   }
 
-  if (I.getType()->isInteger(1))
+  if (I.getType()->isIntegerTy(1))
     return BinaryOperator::CreateXor(LHS, RHS);
 
-  if (I.getType()->isInteger()) {
+  if (I.getType()->isIntegerTy()) {
     // X + X --> X << 1
     if (LHS == RHS)
       return BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1));
@@ -168,7 +168,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
   // -A + B  -->  B - A
   // -A + -B  -->  -(A + B)
   if (Value *LHSV = dyn_castNegVal(LHS)) {
-    if (LHS->getType()->isIntOrIntVector()) {
+    if (LHS->getType()->isIntOrIntVectorTy()) {
       if (Value *RHSV = dyn_castNegVal(RHS)) {
         Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum");
         return BinaryOperator::CreateNeg(NewAdd);
@@ -222,7 +222,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
   }
 
   // W*X + Y*Z --> W * (X+Z)  iff W == Y
-  if (I.getType()->isIntOrIntVector()) {
+  if (I.getType()->isIntOrIntVectorTy()) {
     Value *W, *X, *Y, *Z;
     if (match(LHS, m_Mul(m_Value(W), m_Value(X))) &&
         match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) {
@@ -373,10 +373,10 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
     if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS))
       return ReplaceInstUsesWith(I, LHS);
 
-  // Check for (add double (sitofp x), y), see if we can merge this into an
+  // Check for (fadd double (sitofp x), y), see if we can merge this into an
   // integer add followed by a promotion.
   if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) {
-    // (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst))
+    // (fadd double (sitofp x), fpcst) --> (sitofp (add int x, intcst))
     // ... if the constant fits in the integer value.  This is useful for things
     // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer
     // requires a constant pool load, and generally allows the add to be better
@@ -394,7 +394,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
       }
     }
     
-    // (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y))
+    // (fadd double (sitofp x), (sitofp y)) --> (sitofp (add int x, y))
     if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) {
       // Only do this if x/y have the same type, if at last one of them has a
       // single use (so we don't increase the number of int->fp conversions),
@@ -560,7 +560,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
     return ReplaceInstUsesWith(I, Op0);    // undef - X -> undef
   if (isa<UndefValue>(Op1))
     return ReplaceInstUsesWith(I, Op1);    // X - undef -> undef
-  if (I.getType()->isInteger(1))
+  if (I.getType()->isIntegerTy(1))
     return BinaryOperator::CreateXor(Op0, Op1);
   
   if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) {
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 28fd70e..3fb3de7 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -137,80 +137,44 @@ static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) {
 /// opcode and two operands into either a constant true or false, or a brand 
 /// new ICmp instruction. The sign is passed in to determine which kind
 /// of predicate to use in the new icmp instruction.
-static Value *getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS) {
+static Value *getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
+                           InstCombiner::BuilderTy *Builder) {
+  CmpInst::Predicate Pred;
   switch (Code) {
   default: assert(0 && "Illegal ICmp code!");
-  case 0:
-    return ConstantInt::getFalse(LHS->getContext());
-  case 1: 
-    if (Sign)
-      return new ICmpInst(ICmpInst::ICMP_SGT, LHS, RHS);
-    return new ICmpInst(ICmpInst::ICMP_UGT, LHS, RHS);
-  case 2:
-    return new ICmpInst(ICmpInst::ICMP_EQ,  LHS, RHS);
-  case 3: 
-    if (Sign)
-      return new ICmpInst(ICmpInst::ICMP_SGE, LHS, RHS);
-    return new ICmpInst(ICmpInst::ICMP_UGE, LHS, RHS);
-  case 4: 
-    if (Sign)
-      return new ICmpInst(ICmpInst::ICMP_SLT, LHS, RHS);
-    return new ICmpInst(ICmpInst::ICMP_ULT, LHS, RHS);
-  case 5:
-    return new ICmpInst(ICmpInst::ICMP_NE,  LHS, RHS);
-  case 6: 
-    if (Sign)
-      return new ICmpInst(ICmpInst::ICMP_SLE, LHS, RHS);
-    return new ICmpInst(ICmpInst::ICMP_ULE, LHS, RHS);
-  case 7:
-    return ConstantInt::getTrue(LHS->getContext());
+  case 0: // False.
+    return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+  case 1: Pred = Sign ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
+  case 2: Pred = ICmpInst::ICMP_EQ; break;
+  case 3: Pred = Sign ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
+  case 4: Pred = Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
+  case 5: Pred = ICmpInst::ICMP_NE; break;
+  case 6: Pred = Sign ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
+  case 7: // True.
+    return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
   }
+  return Builder->CreateICmp(Pred, LHS, RHS);
 }
 
 /// getFCmpValue - This is the complement of getFCmpCode, which turns an
 /// opcode and two operands into either a FCmp instruction. isordered is passed
 /// in to determine which kind of predicate to use in the new fcmp instruction.
 static Value *getFCmpValue(bool isordered, unsigned code,
-                           Value *LHS, Value *RHS) {
+                           Value *LHS, Value *RHS,
+                           InstCombiner::BuilderTy *Builder) {
+  CmpInst::Predicate Pred;
   switch (code) {
-  default: llvm_unreachable("Illegal FCmp code!");
-  case  0:
-    if (isordered)
-      return new FCmpInst(FCmpInst::FCMP_ORD, LHS, RHS);
-    else
-      return new FCmpInst(FCmpInst::FCMP_UNO, LHS, RHS);
-  case  1: 
-    if (isordered)
-      return new FCmpInst(FCmpInst::FCMP_OGT, LHS, RHS);
-    else
-      return new FCmpInst(FCmpInst::FCMP_UGT, LHS, RHS);
-  case  2: 
-    if (isordered)
-      return new FCmpInst(FCmpInst::FCMP_OEQ, LHS, RHS);
-    else
-      return new FCmpInst(FCmpInst::FCMP_UEQ, LHS, RHS);
-  case  3: 
-    if (isordered)
-      return new FCmpInst(FCmpInst::FCMP_OGE, LHS, RHS);
-    else
-      return new FCmpInst(FCmpInst::FCMP_UGE, LHS, RHS);
-  case  4: 
-    if (isordered)
-      return new FCmpInst(FCmpInst::FCMP_OLT, LHS, RHS);
-    else
-      return new FCmpInst(FCmpInst::FCMP_ULT, LHS, RHS);
-  case  5: 
-    if (isordered)
-      return new FCmpInst(FCmpInst::FCMP_ONE, LHS, RHS);
-    else
-      return new FCmpInst(FCmpInst::FCMP_UNE, LHS, RHS);
-  case  6: 
-    if (isordered)
-      return new FCmpInst(FCmpInst::FCMP_OLE, LHS, RHS);
-    else
-      return new FCmpInst(FCmpInst::FCMP_ULE, LHS, RHS);
-  case  7: return ConstantInt::getTrue(LHS->getContext());
+  default: assert(0 && "Illegal FCmp code!");
+  case 0: Pred = isordered ? FCmpInst::FCMP_ORD : FCmpInst::FCMP_UNO; break;
+  case 1: Pred = isordered ? FCmpInst::FCMP_OGT : FCmpInst::FCMP_UGT; break;
+  case 2: Pred = isordered ? FCmpInst::FCMP_OEQ : FCmpInst::FCMP_UEQ; break;
+  case 3: Pred = isordered ? FCmpInst::FCMP_OGE : FCmpInst::FCMP_UGE; break;
+  case 4: Pred = isordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT; break;
+  case 5: Pred = isordered ? FCmpInst::FCMP_ONE : FCmpInst::FCMP_UNE; break;
+  case 6: Pred = isordered ? FCmpInst::FCMP_OLE : FCmpInst::FCMP_ULE; break;
+  case 7: return ConstantInt::getTrue(LHS->getContext());
   }
+  return Builder->CreateFCmp(Pred, LHS, RHS);
 }
 
 /// PredicatesFoldable - Return true if both predicates match sign or if at
@@ -355,40 +319,39 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
 /// (V-Lo) <u Hi-Lo.  This method expects that Lo <= Hi. isSigned indicates
 /// whether to treat the V, Lo and HI as signed or not. IB is the location to
 /// insert new instructions.
-Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
-                                           bool isSigned, bool Inside, 
-                                           Instruction &IB) {
+Value *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
+                                     bool isSigned, bool Inside) {
   assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ? 
             ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() &&
          "Lo is not <= Hi in range emission code!");
     
   if (Inside) {
     if (Lo == Hi)  // Trivially false.
-      return new ICmpInst(ICmpInst::ICMP_NE, V, V);
+      return ConstantInt::getFalse(V->getContext());
 
     // V >= Min && V < Hi --> V < Hi
     if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
       ICmpInst::Predicate pred = (isSigned ? 
         ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT);
-      return new ICmpInst(pred, V, Hi);
+      return Builder->CreateICmp(pred, V, Hi);
     }
 
     // Emit V-Lo <u Hi-Lo
     Constant *NegLo = ConstantExpr::getNeg(Lo);
     Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
     Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi);
-    return new ICmpInst(ICmpInst::ICMP_ULT, Add, UpperBound);
+    return Builder->CreateICmpULT(Add, UpperBound);
   }
 
   if (Lo == Hi)  // Trivially true.
-    return new ICmpInst(ICmpInst::ICMP_EQ, V, V);
+    return ConstantInt::getTrue(V->getContext());
 
   // V < Min || V >= Hi -> V > Hi-1
   Hi = SubOne(cast<ConstantInt>(Hi));
   if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
     ICmpInst::Predicate pred = (isSigned ? 
         ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT);
-    return new ICmpInst(pred, V, Hi);
+    return Builder->CreateICmp(pred, V, Hi);
   }
 
   // Emit V-Lo >u Hi-1-Lo
@@ -396,7 +359,7 @@ Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
   ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo));
   Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
   Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi);
-  return new ICmpInst(ICmpInst::ICMP_UGT, Add, LowerBound);
+  return Builder->CreateICmpUGT(Add, LowerBound);
 }
 
 // isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s with
@@ -472,8 +435,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
 }
 
 /// FoldAndOfICmps - Fold (icmp)&(icmp) if possible.
-Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
-                                          ICmpInst *LHS, ICmpInst *RHS) {
+Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
   ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
 
   // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
@@ -486,11 +448,7 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
       Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
       unsigned Code = getICmpCode(LHS) & getICmpCode(RHS);
       bool isSigned = LHS->isSigned() || RHS->isSigned();
-      Value *RV = getICmpValue(isSigned, Code, Op0, Op1);
-      if (Instruction *I = dyn_cast<Instruction>(RV))
-        return I;
-      // Otherwise, it's a constant boolean value.
-      return ReplaceInstUsesWith(I, RV);
+      return getICmpValue(isSigned, Code, Op0, Op1, Builder);
     }
   }
   
@@ -506,13 +464,13 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
     if (LHSCC == ICmpInst::ICMP_ULT &&
         LHSCst->getValue().isPowerOf2()) {
       Value *NewOr = Builder->CreateOr(Val, Val2);
-      return new ICmpInst(LHSCC, NewOr, LHSCst);
+      return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
     }
     
     // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0)
     if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) {
       Value *NewOr = Builder->CreateOr(Val, Val2);
-      return new ICmpInst(LHSCC, NewOr, LHSCst);
+      return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
     }
   }
   
@@ -562,33 +520,32 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
     case ICmpInst::ICMP_EQ:         // (X == 13 & X == 15) -> false
     case ICmpInst::ICMP_UGT:        // (X == 13 & X >  15) -> false
     case ICmpInst::ICMP_SGT:        // (X == 13 & X >  15) -> false
-      return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+      return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
     case ICmpInst::ICMP_NE:         // (X == 13 & X != 15) -> X == 13
     case ICmpInst::ICMP_ULT:        // (X == 13 & X <  15) -> X == 13
     case ICmpInst::ICMP_SLT:        // (X == 13 & X <  15) -> X == 13
-      return ReplaceInstUsesWith(I, LHS);
+      return LHS;
     }
   case ICmpInst::ICMP_NE:
     switch (RHSCC) {
     default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_ULT:
       if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13
-        return new ICmpInst(ICmpInst::ICMP_ULT, Val, LHSCst);
+        return Builder->CreateICmpULT(Val, LHSCst);
       break;                        // (X != 13 & X u< 15) -> no change
     case ICmpInst::ICMP_SLT:
       if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13
-        return new ICmpInst(ICmpInst::ICMP_SLT, Val, LHSCst);
+        return Builder->CreateICmpSLT(Val, LHSCst);
       break;                        // (X != 13 & X s< 15) -> no change
     case ICmpInst::ICMP_EQ:         // (X != 13 & X == 15) -> X == 15
     case ICmpInst::ICMP_UGT:        // (X != 13 & X u> 15) -> X u> 15
     case ICmpInst::ICMP_SGT:        // (X != 13 & X s> 15) -> X s> 15
-      return ReplaceInstUsesWith(I, RHS);
+      return RHS;
     case ICmpInst::ICMP_NE:
       if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1
         Constant *AddCST = ConstantExpr::getNeg(LHSCst);
         Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
-        return new ICmpInst(ICmpInst::ICMP_UGT, Add,
-                            ConstantInt::get(Add->getType(), 1));
+        return Builder->CreateICmpUGT(Add, ConstantInt::get(Add->getType(), 1));
       }
       break;                        // (X != 13 & X != 15) -> no change
     }
@@ -598,12 +555,12 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
     default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:         // (X u< 13 & X == 15) -> false
     case ICmpInst::ICMP_UGT:        // (X u< 13 & X u> 15) -> false
-      return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+      return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
     case ICmpInst::ICMP_SGT:        // (X u< 13 & X s> 15) -> no change
       break;
     case ICmpInst::ICMP_NE:         // (X u< 13 & X != 15) -> X u< 13
     case ICmpInst::ICMP_ULT:        // (X u< 13 & X u< 15) -> X u< 13
-      return ReplaceInstUsesWith(I, LHS);
+      return LHS;
     case ICmpInst::ICMP_SLT:        // (X u< 13 & X s< 15) -> no change
       break;
     }
@@ -613,12 +570,12 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
     default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:         // (X s< 13 & X == 15) -> false
     case ICmpInst::ICMP_SGT:        // (X s< 13 & X s> 15) -> false
-      return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+      return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
     case ICmpInst::ICMP_UGT:        // (X s< 13 & X u> 15) -> no change
       break;
     case ICmpInst::ICMP_NE:         // (X s< 13 & X != 15) -> X < 13
     case ICmpInst::ICMP_SLT:        // (X s< 13 & X s< 15) -> X < 13
-      return ReplaceInstUsesWith(I, LHS);
+      return LHS;
     case ICmpInst::ICMP_ULT:        // (X s< 13 & X u< 15) -> no change
       break;
     }
@@ -628,16 +585,15 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
     default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:         // (X u> 13 & X == 15) -> X == 15
     case ICmpInst::ICMP_UGT:        // (X u> 13 & X u> 15) -> X u> 15
-      return ReplaceInstUsesWith(I, RHS);
+      return RHS;
     case ICmpInst::ICMP_SGT:        // (X u> 13 & X s> 15) -> no change
       break;
     case ICmpInst::ICMP_NE:
       if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14
-        return new ICmpInst(LHSCC, Val, RHSCst);
+        return Builder->CreateICmp(LHSCC, Val, RHSCst);
       break;                        // (X u> 13 & X != 15) -> no change
     case ICmpInst::ICMP_ULT:        // (X u> 13 & X u< 15) -> (X-14) <u 1
-      return InsertRangeTest(Val, AddOne(LHSCst),
-                             RHSCst, false, true, I);
+      return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, false, true);
     case ICmpInst::ICMP_SLT:        // (X u> 13 & X s< 15) -> no change
       break;
     }
@@ -647,16 +603,15 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
     default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:         // (X s> 13 & X == 15) -> X == 15
     case ICmpInst::ICMP_SGT:        // (X s> 13 & X s> 15) -> X s> 15
-      return ReplaceInstUsesWith(I, RHS);
+      return RHS;
     case ICmpInst::ICMP_UGT:        // (X s> 13 & X u> 15) -> no change
       break;
     case ICmpInst::ICMP_NE:
       if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14
-        return new ICmpInst(LHSCC, Val, RHSCst);
+        return Builder->CreateICmp(LHSCC, Val, RHSCst);
       break;                        // (X s> 13 & X != 15) -> no change
     case ICmpInst::ICMP_SLT:        // (X s> 13 & X s< 15) -> (X-14) s< 1
-      return InsertRangeTest(Val, AddOne(LHSCst),
-                             RHSCst, true, true, I);
+      return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, true, true);
     case ICmpInst::ICMP_ULT:        // (X s> 13 & X u< 15) -> no change
       break;
     }
@@ -666,9 +621,10 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,
   return 0;
 }
 
-Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS,
-                                          FCmpInst *RHS) {
-  
+/// FoldAndOfFCmps - Optimize (fcmp)&(fcmp).  NOTE: Unlike the rest of
+/// instcombine, this returns a Value which should already be inserted into the
+/// function.
+Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
   if (LHS->getPredicate() == FCmpInst::FCMP_ORD &&
       RHS->getPredicate() == FCmpInst::FCMP_ORD) {
     // (fcmp ord x, c) & (fcmp ord y, c)  -> (fcmp ord x, y)
@@ -677,17 +633,15 @@ Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS,
         // If either of the constants are nans, then the whole thing returns
         // false.
         if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
-          return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
-        return new FCmpInst(FCmpInst::FCMP_ORD,
-                            LHS->getOperand(0), RHS->getOperand(0));
+          return ConstantInt::getFalse(LHS->getContext());
+        return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
       }
     
     // Handle vector zeros.  This occurs because the canonical form of
     // "fcmp ord x,x" is "fcmp ord x, 0".
     if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
         isa<ConstantAggregateZero>(RHS->getOperand(1)))
-      return new FCmpInst(FCmpInst::FCMP_ORD,
-                          LHS->getOperand(0), RHS->getOperand(0));
+      return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
     return 0;
   }
   
@@ -705,14 +659,13 @@ Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS,
   if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
     // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y).
     if (Op0CC == Op1CC)
-      return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS);
-    
+      return Builder->CreateFCmp((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS);
     if (Op0CC == FCmpInst::FCMP_FALSE || Op1CC == FCmpInst::FCMP_FALSE)
-      return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+      return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
     if (Op0CC == FCmpInst::FCMP_TRUE)
-      return ReplaceInstUsesWith(I, RHS);
+      return RHS;
     if (Op1CC == FCmpInst::FCMP_TRUE)
-      return ReplaceInstUsesWith(I, LHS);
+      return LHS;
     
     bool Op0Ordered;
     bool Op1Ordered;
@@ -727,14 +680,14 @@ Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS,
       // uno && ueq -> uno && (uno || eq) -> ueq
       // ord && olt -> ord && (ord && lt) -> olt
       if (Op0Ordered == Op1Ordered)
-        return ReplaceInstUsesWith(I, RHS);
+        return RHS;
       
       // uno && oeq -> uno && (ord && eq) -> false
       // uno && ord -> false
       if (!Op0Ordered)
-        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+        return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
       // ord && ueq -> ord && (uno || eq) -> oeq
-      return cast<Instruction>(getFCmpValue(true, Op1Pred, Op0LHS, Op0RHS));
+      return getFCmpValue(true, Op1Pred, Op0LHS, Op0RHS, Builder);
     }
   }
 
@@ -930,26 +883,47 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
   
   if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1))
     if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0))
-      if (Instruction *Res = FoldAndOfICmps(I, LHS, RHS))
-        return Res;
-
+      if (Value *Res = FoldAndOfICmps(LHS, RHS))
+        return ReplaceInstUsesWith(I, Res);
+  
+  // If and'ing two fcmp, try combine them into one.
+  if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
+    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
+      if (Value *Res = FoldAndOfFCmps(LHS, RHS))
+        return ReplaceInstUsesWith(I, Res);
+  
+  
   // fold (and (cast A), (cast B)) -> (cast (and A, B))
   if (CastInst *Op0C = dyn_cast<CastInst>(Op0))
-    if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
-      if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind ?
-        const Type *SrcTy = Op0C->getOperand(0)->getType();
-        if (SrcTy == Op1C->getOperand(0)->getType() &&
-            SrcTy->isIntOrIntVector() &&
-            // Only do this if the casts both really cause code to be generated.
-            ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0),
-                              I.getType()) &&
-            ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 
-                              I.getType())) {
-          Value *NewOp = Builder->CreateAnd(Op0C->getOperand(0),
-                                            Op1C->getOperand(0), I.getName());
+    if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) {
+      const Type *SrcTy = Op0C->getOperand(0)->getType();
+      if (Op0C->getOpcode() == Op1C->getOpcode() && // same cast kind ?
+          SrcTy == Op1C->getOperand(0)->getType() &&
+          SrcTy->isIntOrIntVectorTy()) {
+        Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
+        
+        // Only do this if the casts both really cause code to be generated.
+        if (ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
+            ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) {
+          Value *NewOp = Builder->CreateAnd(Op0COp, Op1COp, I.getName());
           return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
         }
+        
+        // If this is and(cast(icmp), cast(icmp)), try to fold this even if the
+        // cast is otherwise not optimizable.  This happens for vector sexts.
+        if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
+          if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
+            if (Value *Res = FoldAndOfICmps(LHS, RHS))
+              return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+        
+        // If this is and(cast(fcmp), cast(fcmp)), try to fold this even if the
+        // cast is otherwise not optimizable.  This happens for vector sexts.
+        if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
+          if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp))
+            if (Value *Res = FoldAndOfFCmps(LHS, RHS))
+              return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
       }
+    }
     
   // (X >> Z) & (Y >> Z)  -> (X&Y) >> Z  for all shifts.
   if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
@@ -965,13 +939,6 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
       }
   }
 
-  // If and'ing two fcmp, try combine them into one.
-  if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
-    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
-      if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS))
-        return Res;
-  }
-
   return Changed ? &I : 0;
 }
 
@@ -1143,7 +1110,7 @@ static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
   // If A is not a select of -1/0, this cannot match.
   Value *Cond = 0;
   if (!match(A, m_SExt(m_Value(Cond))) ||
-      !Cond->getType()->isInteger(1))
+      !Cond->getType()->isIntegerTy(1))
     return 0;
 
   // ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B.
@@ -1161,8 +1128,7 @@ static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
 }
 
 /// FoldOrOfICmps - Fold (icmp)|(icmp) if possible.
-Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
-                                         ICmpInst *LHS, ICmpInst *RHS) {
+Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
   ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
 
   // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
@@ -1175,11 +1141,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
       Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
       unsigned Code = getICmpCode(LHS) | getICmpCode(RHS);
       bool isSigned = LHS->isSigned() || RHS->isSigned();
-      Value *RV = getICmpValue(isSigned, Code, Op0, Op1);
-      if (Instruction *I = dyn_cast<Instruction>(RV))
-        return I;
-      // Otherwise, it's a constant boolean value.
-      return ReplaceInstUsesWith(I, RV);
+      return getICmpValue(isSigned, Code, Op0, Op1, Builder);
     }
   }
   
@@ -1193,7 +1155,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
   if (LHSCst == RHSCst && LHSCC == RHSCC &&
       LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) {
     Value *NewOr = Builder->CreateOr(Val, Val2);
-    return new ICmpInst(LHSCC, NewOr, LHSCst);
+    return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
   }
   
   // From here on, we only handle:
@@ -1245,7 +1207,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
         Constant *AddCST = ConstantExpr::getNeg(LHSCst);
         Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
         AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
-        return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST);
+        return Builder->CreateICmpULT(Add, AddCST);
       }
       break;                         // (X == 13 | X == 15) -> no change
     case ICmpInst::ICMP_UGT:         // (X == 13 | X u> 14) -> no change
@@ -1254,7 +1216,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
     case ICmpInst::ICMP_NE:          // (X == 13 | X != 15) -> X != 15
     case ICmpInst::ICMP_ULT:         // (X == 13 | X u< 15) -> X u< 15
     case ICmpInst::ICMP_SLT:         // (X == 13 | X s< 15) -> X s< 15
-      return ReplaceInstUsesWith(I, RHS);
+      return RHS;
     }
     break;
   case ICmpInst::ICMP_NE:
@@ -1263,11 +1225,11 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
     case ICmpInst::ICMP_EQ:          // (X != 13 | X == 15) -> X != 13
     case ICmpInst::ICMP_UGT:         // (X != 13 | X u> 15) -> X != 13
     case ICmpInst::ICMP_SGT:         // (X != 13 | X s> 15) -> X != 13
-      return ReplaceInstUsesWith(I, LHS);
+      return LHS;
     case ICmpInst::ICMP_NE:          // (X != 13 | X != 15) -> true
     case ICmpInst::ICMP_ULT:         // (X != 13 | X u< 15) -> true
     case ICmpInst::ICMP_SLT:         // (X != 13 | X s< 15) -> true
-      return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+      return ConstantInt::getTrue(LHS->getContext());
     }
     break;
   case ICmpInst::ICMP_ULT:
@@ -1279,14 +1241,13 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
       // If RHSCst is [us]MAXINT, it is always false.  Not handling
       // this can cause overflow.
       if (RHSCst->isMaxValue(false))
-        return ReplaceInstUsesWith(I, LHS);
-      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst),
-                             false, false, I);
+        return LHS;
+      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), false, false);
     case ICmpInst::ICMP_SGT:        // (X u< 13 | X s> 15) -> no change
       break;
     case ICmpInst::ICMP_NE:         // (X u< 13 | X != 15) -> X != 15
     case ICmpInst::ICMP_ULT:        // (X u< 13 | X u< 15) -> X u< 15
-      return ReplaceInstUsesWith(I, RHS);
+      return RHS;
     case ICmpInst::ICMP_SLT:        // (X u< 13 | X s< 15) -> no change
       break;
     }
@@ -1300,14 +1261,13 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
       // If RHSCst is [us]MAXINT, it is always false.  Not handling
       // this can cause overflow.
       if (RHSCst->isMaxValue(true))
-        return ReplaceInstUsesWith(I, LHS);
-      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst),
-                             true, false, I);
+        return LHS;
+      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), true, false);
     case ICmpInst::ICMP_UGT:        // (X s< 13 | X u> 15) -> no change
       break;
     case ICmpInst::ICMP_NE:         // (X s< 13 | X != 15) -> X != 15
     case ICmpInst::ICMP_SLT:        // (X s< 13 | X s< 15) -> X s< 15
-      return ReplaceInstUsesWith(I, RHS);
+      return RHS;
     case ICmpInst::ICMP_ULT:        // (X s< 13 | X u< 15) -> no change
       break;
     }
@@ -1317,12 +1277,12 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
     default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:         // (X u> 13 | X == 15) -> X u> 13
     case ICmpInst::ICMP_UGT:        // (X u> 13 | X u> 15) -> X u> 13
-      return ReplaceInstUsesWith(I, LHS);
+      return LHS;
     case ICmpInst::ICMP_SGT:        // (X u> 13 | X s> 15) -> no change
       break;
     case ICmpInst::ICMP_NE:         // (X u> 13 | X != 15) -> true
     case ICmpInst::ICMP_ULT:        // (X u> 13 | X u< 15) -> true
-      return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+      return ConstantInt::getTrue(LHS->getContext());
     case ICmpInst::ICMP_SLT:        // (X u> 13 | X s< 15) -> no change
       break;
     }
@@ -1332,12 +1292,12 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
     default: llvm_unreachable("Unknown integer condition code!");
     case ICmpInst::ICMP_EQ:         // (X s> 13 | X == 15) -> X > 13
     case ICmpInst::ICMP_SGT:        // (X s> 13 | X s> 15) -> X > 13
-      return ReplaceInstUsesWith(I, LHS);
+      return LHS;
     case ICmpInst::ICMP_UGT:        // (X s> 13 | X u> 15) -> no change
       break;
     case ICmpInst::ICMP_NE:         // (X s> 13 | X != 15) -> true
     case ICmpInst::ICMP_SLT:        // (X s> 13 | X s< 15) -> true
-      return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+      return ConstantInt::getTrue(LHS->getContext());
     case ICmpInst::ICMP_ULT:        // (X s> 13 | X u< 15) -> no change
       break;
     }
@@ -1346,8 +1306,10 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
   return 0;
 }
 
-Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS,
-                                         FCmpInst *RHS) {
+/// FoldOrOfFCmps - Optimize (fcmp)|(fcmp).  NOTE: Unlike the rest of
+/// instcombine, this returns a Value which should already be inserted into the
+/// function.
+Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
   if (LHS->getPredicate() == FCmpInst::FCMP_UNO &&
       RHS->getPredicate() == FCmpInst::FCMP_UNO && 
       LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) {
@@ -1356,20 +1318,18 @@ Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS,
         // If either of the constants are nans, then the whole thing returns
         // true.
         if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
-          return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+          return ConstantInt::getTrue(LHS->getContext());
         
         // Otherwise, no need to compare the two constants, compare the
         // rest.
-        return new FCmpInst(FCmpInst::FCMP_UNO,
-                            LHS->getOperand(0), RHS->getOperand(0));
+        return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0));
       }
     
     // Handle vector zeros.  This occurs because the canonical form of
     // "fcmp uno x,x" is "fcmp uno x, 0".
     if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
         isa<ConstantAggregateZero>(RHS->getOperand(1)))
-      return new FCmpInst(FCmpInst::FCMP_UNO,
-                          LHS->getOperand(0), RHS->getOperand(0));
+      return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0));
     
     return 0;
   }
@@ -1386,14 +1346,13 @@ Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS,
   if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
     // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y).
     if (Op0CC == Op1CC)
-      return new FCmpInst((FCmpInst::Predicate)Op0CC,
-                          Op0LHS, Op0RHS);
+      return Builder->CreateFCmp((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS);
     if (Op0CC == FCmpInst::FCMP_TRUE || Op1CC == FCmpInst::FCMP_TRUE)
-      return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+      return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
     if (Op0CC == FCmpInst::FCMP_FALSE)
-      return ReplaceInstUsesWith(I, RHS);
+      return RHS;
     if (Op1CC == FCmpInst::FCMP_FALSE)
-      return ReplaceInstUsesWith(I, LHS);
+      return LHS;
     bool Op0Ordered;
     bool Op1Ordered;
     unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
@@ -1401,11 +1360,7 @@ Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS,
     if (Op0Ordered == Op1Ordered) {
       // If both are ordered or unordered, return a new fcmp with
       // or'ed predicates.
-      Value *RV = getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, Op0LHS, Op0RHS);
-      if (Instruction *I = dyn_cast<Instruction>(RV))
-        return I;
-      // Otherwise, it's a constant boolean value...
-      return ReplaceInstUsesWith(I, RV);
+      return getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, Op0LHS, Op0RHS, Builder);
     }
   }
   return 0;
@@ -1446,8 +1401,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
 
   if (Value *V = SimplifyOrInst(Op0, Op1, TD))
     return ReplaceInstUsesWith(I, V);
-  
-  
+
   // See if we can simplify any instructions used by the instruction whose sole 
   // purpose is to compute bits we don't care about.
   if (SimplifyDemandedInstructionBits(I))
@@ -1456,7 +1410,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
   if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
     ConstantInt *C1 = 0; Value *X = 0;
     // (X & C1) | C2 --> (X | C2) & (C1|C2)
+    // iff (C1 & C2) == 0.
     if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) &&
+        (RHS->getValue() & C1->getValue()) != 0 &&
         Op0->hasOneUse()) {
       Value *Or = Builder->CreateOr(X, RHS);
       Or->takeName(Op0);
@@ -1479,6 +1435,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
     if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
       if (Instruction *R = FoldOpIntoSelect(I, SI))
         return R;
+
     if (isa<PHINode>(Op0))
       if (Instruction *NV = FoldOpIntoPhi(I))
         return NV;
@@ -1600,7 +1557,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
     // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) ->  C0 ? A : B, and commuted variants.
     // Don't do this for vector select idioms, the code generator doesn't handle
     // them well yet.
-    if (!isa<VectorType>(I.getType())) {
+    if (!I.getType()->isVectorTy()) {
       if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D))
         return Match;
       if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C))
@@ -1666,40 +1623,50 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
 
   if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
     if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
-      if (Instruction *Res = FoldOrOfICmps(I, LHS, RHS))
-        return Res;
+      if (Value *Res = FoldOrOfICmps(LHS, RHS))
+        return ReplaceInstUsesWith(I, Res);
     
+  // (fcmp uno x, c) | (fcmp uno y, c)  -> (fcmp uno x, y)
+  if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
+    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
+      if (Value *Res = FoldOrOfFCmps(LHS, RHS))
+        return ReplaceInstUsesWith(I, Res);
+  
   // fold (or (cast A), (cast B)) -> (cast (or A, B))
   if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
     if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
       if (Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ?
-        if (!isa<ICmpInst>(Op0C->getOperand(0)) ||
-            !isa<ICmpInst>(Op1C->getOperand(0))) {
-          const Type *SrcTy = Op0C->getOperand(0)->getType();
-          if (SrcTy == Op1C->getOperand(0)->getType() &&
-              SrcTy->isIntOrIntVector() &&
+        const Type *SrcTy = Op0C->getOperand(0)->getType();
+        if (SrcTy == Op1C->getOperand(0)->getType() &&
+            SrcTy->isIntOrIntVectorTy()) {
+          Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
+
+          if ((!isa<ICmpInst>(Op0COp) || !isa<ICmpInst>(Op1COp)) &&
               // Only do this if the casts both really cause code to be
               // generated.
-              ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), 
-                                I.getType()) &&
-              ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 
-                                I.getType())) {
-            Value *NewOp = Builder->CreateOr(Op0C->getOperand(0),
-                                             Op1C->getOperand(0), I.getName());
+              ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
+              ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) {
+            Value *NewOp = Builder->CreateOr(Op0COp, Op1COp, I.getName());
             return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
           }
+          
+          // If this is or(cast(icmp), cast(icmp)), try to fold this even if the
+          // cast is otherwise not optimizable.  This happens for vector sexts.
+          if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
+            if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
+              if (Value *Res = FoldOrOfICmps(LHS, RHS))
+                return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+          
+          // If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the
+          // cast is otherwise not optimizable.  This happens for vector sexts.
+          if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
+            if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp))
+              if (Value *Res = FoldOrOfFCmps(LHS, RHS))
+                return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
         }
       }
   }
   
-    
-  // (fcmp uno x, c) | (fcmp uno y, c)  -> (fcmp uno x, y)
-  if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) {
-    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
-      if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS))
-        return Res;
-  }
-
   return Changed ? &I : 0;
 }
 
@@ -1723,7 +1690,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   // purpose is to compute bits we don't care about.
   if (SimplifyDemandedInstructionBits(I))
     return &I;
-  if (isa<VectorType>(I.getType()))
+  if (I.getType()->isVectorTy())
     if (isa<ConstantAggregateZero>(Op1))
       return ReplaceInstUsesWith(I, Op0);  // X ^ <0,0> -> X
 
@@ -1971,11 +1938,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
           Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
           unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS);
           bool isSigned = LHS->isSigned() || RHS->isSigned();
-          Value *RV = getICmpValue(isSigned, Code, Op0, Op1);
-          if (Instruction *I = dyn_cast<Instruction>(RV))
-            return I;
-          // Otherwise, it's a constant boolean value.
-          return ReplaceInstUsesWith(I, RV);
+          return ReplaceInstUsesWith(I, 
+                               getICmpValue(isSigned, Code, Op0, Op1, Builder));
         }
       }
 
@@ -1984,12 +1948,12 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
     if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
       if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind?
         const Type *SrcTy = Op0C->getOperand(0)->getType();
-        if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() &&
+        if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isIntegerTy() &&
             // Only do this if the casts both really cause code to be generated.
-            ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0), 
-                              I.getType()) &&
-            ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0), 
-                              I.getType())) {
+            ShouldOptimizeCast(Op0C->getOpcode(), Op0C->getOperand(0), 
+                               I.getType()) &&
+            ShouldOptimizeCast(Op1C->getOpcode(), Op1C->getOperand(0), 
+                               I.getType())) {
           Value *NewOp = Builder->CreateXor(Op0C->getOperand(0),
                                             Op1C->getOperand(0), I.getName());
           return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 4929f40..e2b7d3d 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Support/CallSite.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
 using namespace llvm;
 
 /// getPromotedType - Return the specified type promoted as it would be to pass
@@ -199,7 +200,7 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
   // Extract the length and alignment and fill if they are constant.
   ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
   ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
-  if (!LenC || !FillC || !FillC->getType()->isInteger(8))
+  if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
     return 0;
   uint64_t Len = LenC->getZExtValue();
   Alignment = MI->getAlignment();
@@ -304,23 +305,77 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   switch (II->getIntrinsicID()) {
   default: break;
   case Intrinsic::objectsize: {
+    // We need target data for just about everything so depend on it.
+    if (!TD) break;
+    
     const Type *ReturnTy = CI.getType();
-    Value *Op1 = II->getOperand(1);
     bool Min = (cast<ConstantInt>(II->getOperand(2))->getZExtValue() == 1);
+
+    // Get to the real allocated thing and offset as fast as possible.
+    Value *Op1 = II->getOperand(1)->stripPointerCasts();
     
-    if (!TD) break;
-    Op1 = Op1->stripPointerCasts();
-    
+    // If we've stripped down to a single global variable that we
+    // can know the size of then just return that.
     if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op1)) {
       if (GV->hasDefinitiveInitializer()) {
         Constant *C = GV->getInitializer();
-        size_t globalSize = TD->getTypeAllocSize(C->getType());
-        return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, globalSize));
+        uint64_t GlobalSize = TD->getTypeAllocSize(C->getType());
+        return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, GlobalSize));
       } else {
+        // Can't determine size of the GV.
         Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
         return ReplaceInstUsesWith(CI, RetVal);
       }
-    }
+    } else if (AllocaInst *AI = dyn_cast<AllocaInst>(Op1)) {
+      // Get alloca size.
+      if (AI->getAllocatedType()->isSized()) {
+        uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType());
+        if (AI->isArrayAllocation()) {
+          const ConstantInt *C = dyn_cast<ConstantInt>(AI->getArraySize());
+          if (!C) break;
+          AllocaSize *= C->getZExtValue();
+        }
+        return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, AllocaSize));
+      }
+    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op1)) {      
+      // Only handle constant GEPs here.
+      if (CE->getOpcode() != Instruction::GetElementPtr) break;
+      GEPOperator *GEP = cast<GEPOperator>(CE);
+      
+      // Make sure we're not a constant offset from an external
+      // global.
+      Value *Operand = GEP->getPointerOperand();
+      Operand = Operand->stripPointerCasts();
+      if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Operand))
+        if (!GV->hasDefinitiveInitializer()) break;
+        
+      // Get what we're pointing to and its size. 
+      const PointerType *BaseType = 
+        cast<PointerType>(Operand->getType());
+      uint64_t Size = TD->getTypeAllocSize(BaseType->getElementType());
+      
+      // Get the current byte offset into the thing. Use the original
+      // operand in case we're looking through a bitcast.
+      SmallVector<Value*, 8> Ops(CE->op_begin()+1, CE->op_end());
+      const PointerType *OffsetType =
+        cast<PointerType>(GEP->getPointerOperand()->getType());
+      uint64_t Offset = TD->getIndexedOffset(OffsetType, &Ops[0], Ops.size());
+
+      if (Size < Offset) {
+        // Out of bound reference? Negative index normalized to large
+        // index? Just return "I don't know".
+        Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
+        return ReplaceInstUsesWith(CI, RetVal);
+      }
+      
+      Constant *RetVal = ConstantInt::get(ReturnTy, Size-Offset);
+      return ReplaceInstUsesWith(CI, RetVal);
+      
+    } 
+
+    // Do not return "I don't know" here. Later optimization passes could
+    // make it possible to evaluate objectsize to a constant.
+    break;
   }
   case Intrinsic::bswap:
     // bswap(bswap(x)) -> x
@@ -686,6 +741,122 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
   return true;
 }
 
+// Try to fold some different type of calls here.
+// Currently we're only working with the checking functions, memcpy_chk, 
+// mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk,
+// strcat_chk and strncat_chk.
+Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) {
+  if (CI->getCalledFunction() == 0) return 0;
+  
+  StringRef Name = CI->getCalledFunction()->getName();
+  BasicBlock *BB = CI->getParent();
+  IRBuilder<> B(CI->getParent()->getContext());
+  
+  // Set the builder to the instruction after the call.
+  B.SetInsertPoint(BB, CI);
+
+  if (Name == "__memcpy_chk") {
+    ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4));
+    if (!SizeCI)
+      return 0;
+    ConstantInt *SizeArg = dyn_cast<ConstantInt>(CI->getOperand(3));
+    if (!SizeArg)
+      return 0;
+    if (SizeCI->isAllOnesValue() ||
+        SizeCI->getZExtValue() <= SizeArg->getZExtValue()) {
+      EmitMemCpy(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3),
+                 1, B, TD);
+      return ReplaceInstUsesWith(*CI, CI->getOperand(1));
+    }
+    return 0;
+  }
+
+  // Should be similar to memcpy.
+  if (Name == "__mempcpy_chk") {
+    return 0;
+  }
+
+  if (Name == "__memmove_chk") {
+    ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4));
+    if (!SizeCI)
+      return 0;
+    ConstantInt *SizeArg = dyn_cast<ConstantInt>(CI->getOperand(3));
+    if (!SizeArg)
+      return 0;
+    if (SizeCI->isAllOnesValue() ||
+        SizeCI->getZExtValue() <= SizeArg->getZExtValue()) {
+      EmitMemMove(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3),
+                  1, B, TD);
+      return ReplaceInstUsesWith(*CI, CI->getOperand(1));
+    }
+    return 0;
+  }
+
+  if (Name == "__memset_chk") {
+    ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4));
+    if (!SizeCI)
+      return 0;
+    ConstantInt *SizeArg = dyn_cast<ConstantInt>(CI->getOperand(3));
+    if (!SizeArg)
+      return 0;
+    if (SizeCI->isAllOnesValue() ||
+        SizeCI->getZExtValue() <= SizeArg->getZExtValue()) {
+      Value *Val = B.CreateIntCast(CI->getOperand(2), B.getInt8Ty(),
+                                   false);
+      EmitMemSet(CI->getOperand(1), Val,  CI->getOperand(3), B, TD);
+      return ReplaceInstUsesWith(*CI, CI->getOperand(1));
+    }
+    return 0;
+  }
+
+  if (Name == "__strcpy_chk") {
+    ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(3));
+    if (!SizeCI)
+      return 0;
+    // If a) we don't have any length information, or b) we know this will
+    // fit then just lower to a plain strcpy. Otherwise we'll keep our
+    // strcpy_chk call which may fail at runtime if the size is too long.
+    // TODO: It might be nice to get a maximum length out of the possible
+    // string lengths for varying.
+    if (SizeCI->isAllOnesValue() ||
+      SizeCI->getZExtValue() >= GetStringLength(CI->getOperand(2))) {
+      Value *Ret = EmitStrCpy(CI->getOperand(1), CI->getOperand(2), B, TD);
+      return ReplaceInstUsesWith(*CI, Ret);
+    }
+    return 0;
+  }
+
+  // Should be similar to strcpy.
+  if (Name == "__stpcpy_chk") {
+    return 0;
+  }
+
+  if (Name == "__strncpy_chk") {
+    ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4));
+    if (!SizeCI)
+      return 0;
+    ConstantInt *SizeArg = dyn_cast<ConstantInt>(CI->getOperand(3));
+    if (!SizeArg)
+      return 0;
+    if (SizeCI->isAllOnesValue() ||
+        SizeCI->getZExtValue() <= SizeArg->getZExtValue()) {
+      Value *Ret = EmitStrCpy(CI->getOperand(1), CI->getOperand(2), B, TD);
+      return ReplaceInstUsesWith(*CI, Ret);
+    }
+    return 0; 
+  }
+
+  if (Name == "__strcat_chk") {
+    return 0;
+  }
+
+  if (Name == "__strncat_chk") {
+    return 0;
+  }
+
+  return 0;
+}
+
 // visitCallSite - Improvements for call and invoke instructions.
 //
 Instruction *InstCombiner::visitCallSite(CallSite CS) {
@@ -772,6 +943,16 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
     Changed = true;
   }
 
+  // Try to optimize the call if possible, we require TargetData for most of
+  // this.  None of these calls are seen as possibly dead so go ahead and
+  // delete the instruction now.
+  if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
+    Instruction *I = tryOptimizeCall(CI, TD);
+    // If we changed something return the result, etc. Otherwise let
+    // the fallthrough check.
+    if (I) return EraseInstFromFunction(*I);
+  }
+
   return Changed ? CS.getInstruction() : 0;
 }
 
@@ -796,7 +977,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
   const Type *OldRetTy = Caller->getType();
   const Type *NewRetTy = FT->getReturnType();
 
-  if (isa<StructType>(NewRetTy))
+  if (NewRetTy->isStructTy())
     return false; // TODO: Handle multiple return values.
 
   // Check to see if we are changing the return type...
@@ -804,9 +985,9 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
     if (Callee->isDeclaration() &&
         // Conversion is ok if changing from one pointer type to another or from
         // a pointer to an integer of the same size.
-        !((isa<PointerType>(OldRetTy) || !TD ||
+        !((OldRetTy->isPointerTy() || !TD ||
            OldRetTy == TD->getIntPtrType(Caller->getContext())) &&
-          (isa<PointerType>(NewRetTy) || !TD ||
+          (NewRetTy->isPointerTy() || !TD ||
            NewRetTy == TD->getIntPtrType(Caller->getContext()))))
       return false;   // Cannot transform this return value.
 
@@ -853,9 +1034,9 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
     // Converting from one pointer type to another or between a pointer and an
     // integer of the same size is safe even if we do not have a body.
     bool isConvertible = ActTy == ParamTy ||
-      (TD && ((isa<PointerType>(ParamTy) ||
+      (TD && ((ParamTy->isPointerTy() ||
       ParamTy == TD->getIntPtrType(Caller->getContext())) &&
-              (isa<PointerType>(ActTy) ||
+              (ActTy->isPointerTy() ||
               ActTy == TD->getIntPtrType(Caller->getContext()))));
     if (Callee->isDeclaration() && !isConvertible) return false;
   }
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 09cd21f..a68fc6d 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -23,7 +23,7 @@ using namespace PatternMatch;
 ///
 static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
                                         int &Offset) {
-  assert(Val->getType()->isInteger(32) && "Unexpected allocation size type!");
+  assert(Val->getType()->isIntegerTy(32) && "Unexpected allocation size type!");
   if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
     Offset = CI->getZExtValue();
     Scale  = 0;
@@ -255,17 +255,26 @@ isEliminableCastPair(
   return Instruction::CastOps(Res);
 }
 
-/// ValueRequiresCast - Return true if the cast from "V to Ty" actually results
-/// in any code being generated.  It does not require codegen if V is simple
-/// enough or if the cast can be folded into other casts.
-bool InstCombiner::ValueRequiresCast(Instruction::CastOps opcode,const Value *V,
-                                     const Type *Ty) {
+/// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
+/// results in any code being generated and is interesting to optimize out. If
+/// the cast can be eliminated by some other simple transformation, we prefer
+/// to do the simplification first.
+bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V,
+                                      const Type *Ty) {
+  // Noop casts and casts of constants should be eliminated trivially.
   if (V->getType() == Ty || isa<Constant>(V)) return false;
   
-  // If this is another cast that can be eliminated, it isn't codegen either.
+  // If this is another cast that can be eliminated, we prefer to have it
+  // eliminated.
   if (const CastInst *CI = dyn_cast<CastInst>(V))
-    if (isEliminableCastPair(CI, opcode, Ty, TD))
+    if (isEliminableCastPair(CI, opc, Ty, TD))
       return false;
+  
+  // If this is a vector sext from a compare, then we don't want to break the
+  // idiom where each element of the extended vector is either zero or all ones.
+  if (opc == Instruction::SExt && isa<CmpInst>(V) && Ty->isVectorTy())
+    return false;
+  
   return true;
 }
 
@@ -294,8 +303,8 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
   if (isa<PHINode>(Src)) {
     // We don't do this if this would create a PHI node with an illegal type if
     // it is currently legal.
-    if (!isa<IntegerType>(Src->getType()) ||
-        !isa<IntegerType>(CI.getType()) ||
+    if (!Src->getType()->isIntegerTy() ||
+        !CI.getType()->isIntegerTy() ||
         ShouldChangeType(CI.getType(), Src->getType()))
       if (Instruction *NV = FoldOpIntoPhi(CI))
         return NV;
@@ -427,7 +436,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
   // type.   Only do this if the dest type is a simple type, don't convert the
   // expression tree to something weird like i93 unless the source is also
   // strange.
-  if ((isa<VectorType>(DestTy) || ShouldChangeType(SrcTy, DestTy)) &&
+  if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
       CanEvaluateTruncated(Src, DestTy)) {
       
     // If this cast is a truncate, evaluting in a different type always
@@ -719,7 +728,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
   // expression tree to something weird like i93 unless the source is also
   // strange.
   unsigned BitsToClear;
-  if ((isa<VectorType>(DestTy) || ShouldChangeType(SrcTy, DestTy)) &&
+  if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
       CanEvaluateZExtd(Src, DestTy, BitsToClear)) { 
     assert(BitsToClear < SrcTy->getScalarSizeInBits() &&
            "Unreasonable BitsToClear");
@@ -828,7 +837,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
 
   // zext (xor i1 X, true) to i32  --> xor (zext i1 X to i32), 1
   Value *X;
-  if (SrcI && SrcI->hasOneUse() && SrcI->getType()->isInteger(1) &&
+  if (SrcI && SrcI->hasOneUse() && SrcI->getType()->isIntegerTy(1) &&
       match(SrcI, m_Not(m_Value(X))) &&
       (!X->hasOneUse() || !isa<CmpInst>(X))) {
     Value *New = Builder->CreateZExt(X, CI.getType());
@@ -927,7 +936,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
   // type.   Only do this if the dest type is a simple type, don't convert the
   // expression tree to something weird like i93 unless the source is also
   // strange.
-  if ((isa<VectorType>(DestTy) || ShouldChangeType(SrcTy, DestTy)) &&
+  if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
       CanEvaluateSExtd(Src, DestTy)) {
     // Okay, we can transform this!  Insert the new expression now.
     DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
@@ -1280,7 +1289,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
       Constant::getNullValue(Type::getInt32Ty(CI.getContext()));
     unsigned NumZeros = 0;
     while (SrcElTy != DstElTy && 
-           isa<CompositeType>(SrcElTy) && !isa<PointerType>(SrcElTy) &&
+           isa<CompositeType>(SrcElTy) && !SrcElTy->isPointerTy() &&
            SrcElTy->getNumContainedTypes() /* not "{}" */) {
       SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt);
       ++NumZeros;
@@ -1295,7 +1304,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
   }
 
   if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
-    if (DestVTy->getNumElements() == 1 && !isa<VectorType>(SrcTy)) {
+    if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) {
       Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType());
       return InsertElementInst::Create(UndefValue::get(DestTy), Elem,
                      Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
@@ -1304,7 +1313,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
   }
 
   if (const VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) {
-    if (SrcVTy->getNumElements() == 1 && !isa<VectorType>(DestTy)) {
+    if (SrcVTy->getNumElements() == 1 && !DestTy->isVectorTy()) {
       Value *Elem = 
         Builder->CreateExtractElement(Src,
                    Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
@@ -1315,7 +1324,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
   if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Src)) {
     // Okay, we have (bitcast (shuffle ..)).  Check to see if this is
     // a bitconvert to a vector with the same # elts.
-    if (SVI->hasOneUse() && isa<VectorType>(DestTy) && 
+    if (SVI->hasOneUse() && DestTy->isVectorTy() && 
         cast<VectorType>(DestTy)->getNumElements() ==
               SVI->getType()->getNumElements() &&
         SVI->getType()->getNumElements() ==
@@ -1337,7 +1346,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
     }
   }
   
-  if (isa<PointerType>(SrcTy))
+  if (SrcTy->isPointerTy())
     return commonPointerCastTransforms(CI);
   return commonCastTransforms(CI);
 }
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 7c00c2c..72fd558 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -877,25 +877,26 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
   case ICmpInst::ICMP_EQ:
     if (LoOverflow && HiOverflow)
       return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext()));
-    else if (HiOverflow)
+    if (HiOverflow)
       return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
                           ICmpInst::ICMP_UGE, X, LoBound);
-    else if (LoOverflow)
+    if (LoOverflow)
       return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
                           ICmpInst::ICMP_ULT, X, HiBound);
-    else
-      return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, true, ICI);
+    return ReplaceInstUsesWith(ICI,
+                               InsertRangeTest(X, LoBound, HiBound, DivIsSigned,
+                                               true));
   case ICmpInst::ICMP_NE:
     if (LoOverflow && HiOverflow)
       return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
-    else if (HiOverflow)
+    if (HiOverflow)
       return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
                           ICmpInst::ICMP_ULT, X, LoBound);
-    else if (LoOverflow)
+    if (LoOverflow)
       return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
                           ICmpInst::ICMP_UGE, X, HiBound);
-    else
-      return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, false, ICI);
+    return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound,
+                                                    DivIsSigned, false));
   case ICmpInst::ICMP_ULT:
   case ICmpInst::ICMP_SLT:
     if (LoOverflow == +1)   // Low bound is greater than input range.
@@ -1606,7 +1607,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
   const Type *Ty = Op0->getType();
 
   // icmp's with boolean values can always be turned into bitwise operations
-  if (Ty->isInteger(1)) {
+  if (Ty->isIntegerTy(1)) {
     switch (I.getPredicate()) {
     default: llvm_unreachable("Invalid icmp instruction!");
     case ICmpInst::ICMP_EQ: {               // icmp eq i1 A, B -> ~(A^B)
@@ -1650,7 +1651,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
   unsigned BitWidth = 0;
   if (TD)
     BitWidth = TD->getTypeSizeInBits(Ty->getScalarType());
-  else if (Ty->isIntOrIntVector())
+  else if (Ty->isIntOrIntVectorTy())
     BitWidth = Ty->getScalarSizeInBits();
 
   bool isSignBit = false;
@@ -1988,7 +1989,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
   // values.  If the ptr->ptr cast can be stripped off both arguments, we do so
   // now.
   if (BitCastInst *CI = dyn_cast<BitCastInst>(Op0)) {
-    if (isa<PointerType>(Op0->getType()) && 
+    if (Op0->getType()->isPointerTy() && 
         (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) { 
       // We keep moving the cast from the left operand over to the right
       // operand, where it can often be eliminated completely.
@@ -2458,17 +2459,17 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
           return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
         break;
       }
-    case Instruction::Load:
-      if (GetElementPtrInst *GEP =
-          dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
-        if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
-          if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
-              !cast<LoadInst>(LHSI)->isVolatile())
-            if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I))
-              return Res;
+      case Instruction::Load:
+        if (GetElementPtrInst *GEP =
+            dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
+          if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
+            if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
+                !cast<LoadInst>(LHSI)->isVolatile())
+              if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I))
+                return Res;
+        }
+        break;
       }
-      break;
-    }
   }
 
   return Changed ? &I : 0;
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 2d13298..0f2a24f 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -87,8 +87,8 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
 
     const Type *SrcPTy = SrcTy->getElementType();
 
-    if (DestPTy->isInteger() || isa<PointerType>(DestPTy) || 
-         isa<VectorType>(DestPTy)) {
+    if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() || 
+         DestPTy->isVectorTy()) {
       // If the source is an array, the code below will not succeed.  Check to
       // see if a trivial 'gep P, 0, 0' will help matters.  Only do this for
       // constants.
@@ -104,11 +104,11 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
           }
 
       if (IC.getTargetData() &&
-          (SrcPTy->isInteger() || isa<PointerType>(SrcPTy) || 
-            isa<VectorType>(SrcPTy)) &&
+          (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() || 
+            SrcPTy->isVectorTy()) &&
           // Do not allow turning this into a load of an integer, which is then
           // casted to a pointer, this pessimizes pointer analysis a lot.
-          (isa<PointerType>(SrcPTy) == isa<PointerType>(LI.getType())) &&
+          (SrcPTy->isPointerTy() == LI.getType()->isPointerTy()) &&
           IC.getTargetData()->getTypeSizeInBits(SrcPTy) ==
                IC.getTargetData()->getTypeSizeInBits(DestPTy)) {
 
@@ -243,7 +243,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
   
   const Type *SrcPTy = SrcTy->getElementType();
 
-  if (!DestPTy->isInteger() && !isa<PointerType>(DestPTy))
+  if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy())
     return 0;
   
   /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep"
@@ -255,7 +255,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
   // If the source is an array, the code below will not succeed.  Check to
   // see if a trivial 'gep P, 0, 0' will help matters.  Only do this for
   // constants.
-  if (isa<ArrayType>(SrcPTy) || isa<StructType>(SrcPTy)) {
+  if (SrcPTy->isArrayTy() || SrcPTy->isStructTy()) {
     // Index through pointer.
     Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext()));
     NewGEPIndices.push_back(Zero);
@@ -277,7 +277,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
     SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace());
   }
 
-  if (!SrcPTy->isInteger() && !isa<PointerType>(SrcPTy))
+  if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy())
     return 0;
   
   // If the pointers point into different address spaces or if they point to
@@ -297,11 +297,11 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
   Instruction::CastOps opcode = Instruction::BitCast;
   const Type* CastSrcTy = SIOp0->getType();
   const Type* CastDstTy = SrcPTy;
-  if (isa<PointerType>(CastDstTy)) {
-    if (CastSrcTy->isInteger())
+  if (CastDstTy->isPointerTy()) {
+    if (CastSrcTy->isIntegerTy())
       opcode = Instruction::IntToPtr;
-  } else if (isa<IntegerType>(CastDstTy)) {
-    if (isa<PointerType>(SIOp0->getType()))
+  } else if (CastDstTy->isIntegerTy()) {
+    if (SIOp0->getType()->isPointerTy())
       opcode = Instruction::PtrToInt;
   }
   
@@ -413,7 +413,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
     // Don't count debug info directives, lest they affect codegen,
     // and we skip pointer-to-pointer bitcasts, which are NOPs.
     if (isa<DbgInfoIntrinsic>(BBI) ||
-        (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType()))) {
+        (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
       ScanInsts++;
       continue;
     }    
@@ -483,7 +483,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
   do {
     ++BBI;
   } while (isa<DbgInfoIntrinsic>(BBI) ||
-           (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType())));
+           (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy()));
   if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
     if (BI->isUnconditional())
       if (SimplifyStoreAtEndOfBlock(SI))
@@ -544,7 +544,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
     --BBI;
     // Skip over debugging info.
     while (isa<DbgInfoIntrinsic>(BBI) ||
-           (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType()))) {
+           (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
       if (BBI==OtherBB->begin())
         return false;
       --BBI;
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 2e26a75..b3974e8 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -76,7 +76,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
         return BinaryOperator::CreateShl(Op0,
                  ConstantInt::get(Op0->getType(), Val.logBase2()));
       }
-    } else if (isa<VectorType>(Op1C->getType())) {
+    } else if (Op1C->getType()->isVectorTy()) {
       if (Op1C->isNullValue())
         return ReplaceInstUsesWith(I, Op1C);
 
@@ -157,7 +157,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
   }
 
   /// i1 mul -> i1 and.
-  if (I.getType()->isInteger(1))
+  if (I.getType()->isIntegerTy(1))
     return BinaryOperator::CreateAnd(Op0, Op1);
 
   // X*(1 << Y) --> X << Y
@@ -173,7 +173,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
   // If one of the operands of the multiply is a cast from a boolean value, then
   // we know the bool is either zero or one, so this is a 'masking' multiply.
   //   X * Y (where Y is 0 or 1) -> X & (0-Y)
-  if (!isa<VectorType>(I.getType())) {
+  if (!I.getType()->isVectorTy()) {
     // -2 is "-1 << 1" so it is all bits set except the low one.
     APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true);
     
@@ -203,8 +203,8 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
       // "In IEEE floating point, x*1 is not equivalent to x for nans.  However,
       // ANSI says we can drop signals, so we can do this anyway." (from GCC)
       if (Op1F->isExactlyValue(1.0))
-        return ReplaceInstUsesWith(I, Op0);  // Eliminate 'mul double %X, 1.0'
-    } else if (isa<VectorType>(Op1C->getType())) {
+        return ReplaceInstUsesWith(I, Op0);  // Eliminate 'fmul double %X, 1.0'
+    } else if (Op1C->getType()->isVectorTy()) {
       if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) {
         // As above, vector X*splat(1.0) -> X in all defined cases.
         if (Constant *Splat = Op1V->getSplatValue()) {
@@ -314,7 +314,7 @@ Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) {
   // undef / X -> 0        for integer.
   // undef / X -> undef    for FP (the undef could be a snan).
   if (isa<UndefValue>(Op0)) {
-    if (Op0->getType()->isFPOrFPVector())
+    if (Op0->getType()->isFPOrFPVectorTy())
       return ReplaceInstUsesWith(I, Op0);
     return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
   }
@@ -386,7 +386,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
       return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
 
   // It can't be division by zero, hence it must be division by one.
-  if (I.getType()->isInteger(1))
+  if (I.getType()->isIntegerTy(1))
     return ReplaceInstUsesWith(I, Op0);
 
   if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) {
@@ -493,7 +493,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
 
   // If the sign bits of both operands are zero (i.e. we can prove they are
   // unsigned inputs), turn this into a udiv.
-  if (I.getType()->isInteger()) {
+  if (I.getType()->isIntegerTy()) {
     APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
     if (MaskedValueIsZero(Op0, Mask)) {
       if (MaskedValueIsZero(Op1, Mask)) {
@@ -527,7 +527,7 @@ Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
   if (isa<UndefValue>(Op0)) {             // undef % X -> 0
-    if (I.getType()->isFPOrFPVector())
+    if (I.getType()->isFPOrFPVectorTy())
       return ReplaceInstUsesWith(I, Op0);  // X % undef -> undef (could be SNaN)
     return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
   }
@@ -648,7 +648,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
 
   // If the sign bits of both operands are zero (i.e. we can prove they are
   // unsigned inputs), turn this into a urem.
-  if (I.getType()->isInteger()) {
+  if (I.getType()->isIntegerTy()) {
     APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
     if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) {
       // X srem Y -> X urem Y, iff X and Y don't have sign bit set
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index bb7632f..65f0393 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -266,6 +266,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
   // and if TD isn't around, we can't handle the mixed case.
   bool isVolatile = FirstLI->isVolatile();
   unsigned LoadAlignment = FirstLI->getAlignment();
+  unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace();
   
   // We can't sink the load if the loaded value could be modified between the
   // load and the PHI.
@@ -290,6 +291,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
     // the load and the PHI.
     if (LI->isVolatile() != isVolatile ||
         LI->getParent() != PN.getIncomingBlock(i) ||
+        LI->getPointerAddressSpace() != LoadAddrSpace ||
         !isSafeAndProfitableToSinkLoad(LI))
       return 0;
       
@@ -371,7 +373,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
 
     // Be careful about transforming integer PHIs.  We don't want to pessimize
     // the code by turning an i32 into an i1293.
-    if (isa<IntegerType>(PN.getType()) && isa<IntegerType>(CastSrcTy)) {
+    if (PN.getType()->isIntegerTy() && CastSrcTy->isIntegerTy()) {
       if (!ShouldChangeType(PN.getType(), CastSrcTy))
         return 0;
     }
@@ -832,7 +834,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
   // it is only used by trunc or trunc(lshr) operations.  If so, we split the
   // PHI into the various pieces being extracted.  This sort of thing is
   // introduced when SROA promotes an aggregate to a single large integer type.
-  if (isa<IntegerType>(PN.getType()) && TD &&
+  if (PN.getType()->isIntegerTy() && TD &&
       !TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
     if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
       return Res;
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 9a02b33..2fc9325 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -441,7 +441,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       return ReplaceInstUsesWith(SI, FalseVal);
   }
 
-  if (SI.getType()->isInteger(1)) {
+  if (SI.getType()->isIntegerTy(1)) {
     if (ConstantInt *C = dyn_cast<ConstantInt>(TrueVal)) {
       if (C->getZExtValue()) {
         // Change: A = select B, true, C --> A = or B, C
@@ -539,9 +539,18 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
              !CFPf->getValueAPF().isZero()))
         return ReplaceInstUsesWith(SI, FalseVal);
       }
-      // Transform (X != Y) ? X : Y  -> X
-      if (FCI->getPredicate() == FCmpInst::FCMP_ONE)
+      // Transform (X une Y) ? X : Y  -> X
+      if (FCI->getPredicate() == FCmpInst::FCMP_UNE) {
+        // This is not safe in general for floating point:  
+        // consider X== -0, Y== +0.
+        // It becomes safe if either operand is a nonzero constant.
+        ConstantFP *CFPt, *CFPf;
+        if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+              !CFPt->getValueAPF().isZero()) ||
+            ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+             !CFPf->getValueAPF().isZero()))
         return ReplaceInstUsesWith(SI, TrueVal);
+      }
       // NOTE: if we wanted to, this is where to detect MIN/MAX
 
     } else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){
@@ -557,9 +566,18 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
              !CFPf->getValueAPF().isZero()))
           return ReplaceInstUsesWith(SI, FalseVal);
       }
-      // Transform (X != Y) ? Y : X  -> Y
-      if (FCI->getPredicate() == FCmpInst::FCMP_ONE)
-        return ReplaceInstUsesWith(SI, TrueVal);
+      // Transform (X une Y) ? Y : X  -> Y
+      if (FCI->getPredicate() == FCmpInst::FCMP_UNE) {
+        // This is not safe in general for floating point:  
+        // consider X== -0, Y== +0.
+        // It becomes safe if either operand is a nonzero constant.
+        ConstantFP *CFPt, *CFPf;
+        if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+              !CFPt->getValueAPF().isZero()) ||
+            ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+             !CFPf->getValueAPF().isZero()))
+          return ReplaceInstUsesWith(SI, TrueVal);
+      }
       // NOTE: if we wanted to, this is where to detect MIN/MAX
     }
     // NOTE: if we wanted to, this is where to detect ABS
@@ -629,7 +647,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       }
 
   // See if we can fold the select into one of our operands.
-  if (SI.getType()->isInteger()) {
+  if (SI.getType()->isIntegerTy()) {
     if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal))
       return FoldI;
     
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 53a5684..cd41844 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -104,10 +104,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
   assert(Depth <= 6 && "Limit Search Depth");
   uint32_t BitWidth = DemandedMask.getBitWidth();
   const Type *VTy = V->getType();
-  assert((TD || !isa<PointerType>(VTy)) &&
+  assert((TD || !VTy->isPointerTy()) &&
          "SimplifyDemandedBits needs to know bit widths!");
   assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) &&
-         (!VTy->isIntOrIntVector() ||
+         (!VTy->isIntOrIntVectorTy() ||
           VTy->getScalarSizeInBits() == BitWidth) &&
          KnownZero.getBitWidth() == BitWidth &&
          KnownOne.getBitWidth() == BitWidth &&
@@ -401,7 +401,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     break;
   }
   case Instruction::BitCast:
-    if (!I->getOperand(0)->getType()->isIntOrIntVector())
+    if (!I->getOperand(0)->getType()->isIntOrIntVectorTy())
       return 0;  // vector->int or fp->int?
 
     if (const VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) {
@@ -413,7 +413,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       } else
         // Don't touch a scalar-to-vector bitcast.
         return 0;
-    } else if (isa<VectorType>(I->getOperand(0)->getType()))
+    } else if (I->getOperand(0)->getType()->isVectorTy())
       // Don't touch a vector-to-scalar bitcast.
       return 0;
 
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 20fda1a..a58124d 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -78,7 +78,7 @@ static std::vector<unsigned> getShuffleMask(const ShuffleVectorInst *SVI) {
 /// value is already around as a register, for example if it were inserted then
 /// extracted from the vector.
 static Value *FindScalarElement(Value *V, unsigned EltNo) {
-  assert(isa<VectorType>(V->getType()) && "Not looking at a vector?");
+  assert(V->getType()->isVectorTy() && "Not looking at a vector?");
   const VectorType *PTy = cast<VectorType>(V->getType());
   unsigned Width = PTy->getNumElements();
   if (EltNo >= Width)  // Out of range access.
@@ -322,7 +322,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
 /// that computes V and the LHS value of the shuffle.
 static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
                                      Value *&RHS) {
-  assert(isa<VectorType>(V->getType()) && 
+  assert(V->getType()->isVectorTy() && 
          (RHS == 0 || V->getType() == RHS->getType()) &&
          "Invalid shuffle!");
   unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 93b1961..af9ec5c 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -73,7 +73,7 @@ void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
 /// from 'From' to 'To'.  We don't want to convert from a legal to an illegal
 /// type for example, or from a smaller to a larger illegal type.
 bool InstCombiner::ShouldChangeType(const Type *From, const Type *To) const {
-  assert(isa<IntegerType>(From) && isa<IntegerType>(To));
+  assert(From->isIntegerTy() && To->isIntegerTy());
   
   // If we don't have TD, we don't know if the source/dest are legal.
   if (!TD) return false;
@@ -158,7 +158,7 @@ Value *InstCombiner::dyn_castNegVal(Value *V) const {
     return ConstantExpr::getNeg(C);
 
   if (ConstantVector *C = dyn_cast<ConstantVector>(V))
-    if (C->getType()->getElementType()->isInteger())
+    if (C->getType()->getElementType()->isIntegerTy())
       return ConstantExpr::getNeg(C);
 
   return 0;
@@ -177,7 +177,7 @@ Value *InstCombiner::dyn_castFNegVal(Value *V) const {
     return ConstantExpr::getFNeg(C);
 
   if (ConstantVector *C = dyn_cast<ConstantVector>(V))
-    if (C->getType()->getElementType()->isFloatingPoint())
+    if (C->getType()->getElementType()->isFloatingPointTy())
       return ConstantExpr::getFNeg(C);
 
   return 0;
@@ -226,7 +226,7 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
 
   if (isa<Constant>(TV) || isa<Constant>(FV)) {
     // Bool selects with constant operands can be folded to logical ops.
-    if (SI->getType()->isInteger(1)) return 0;
+    if (SI->getType()->isIntegerTy(1)) return 0;
 
     Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this);
     Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, this);
@@ -478,7 +478,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
     bool EndsWithSequential = false;
     for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
          I != E; ++I)
-      EndsWithSequential = !isa<StructType>(*I);
+      EndsWithSequential = !(*I)->isStructTy();
 
     // Can we combine the two pointer arithmetics offsets?
     if (EndsWithSequential) {
@@ -578,7 +578,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       // into:  %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
       const Type *SrcElTy = StrippedPtrTy->getElementType();
       const Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType();
-      if (TD && isa<ArrayType>(SrcElTy) &&
+      if (TD && SrcElTy->isArrayTy() &&
           TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) ==
           TD->getTypeAllocSize(ResElTy)) {
         Value *Idx[2];
@@ -596,7 +596,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       //   (where tmp = 8*tmp2) into:
       // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
       
-      if (TD && isa<ArrayType>(SrcElTy) && ResElTy->isInteger(8)) {
+      if (TD && SrcElTy->isArrayTy() && ResElTy->isIntegerTy(8)) {
         uint64_t ArrayEltSize =
             TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType());
         
diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
index 3214c8c..8662a82 100644
--- a/lib/Transforms/Instrumentation/ProfilingUtils.cpp
+++ b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
@@ -84,7 +84,7 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
     AI = MainFn->arg_begin();
     // If the program looked at argc, have it look at the return value of the
     // init call instead.
-    if (!AI->getType()->isInteger(32)) {
+    if (!AI->getType()->isIntegerTy(32)) {
       Instruction::CastOps opcode;
       if (!AI->use_empty()) {
         opcode = CastInst::getCastOpcode(InitCall, true, AI->getType(), true);
diff --git a/lib/Transforms/Scalar/ABCD.cpp b/lib/Transforms/Scalar/ABCD.cpp
index cf5e8c0..ea8e5c3 100644
--- a/lib/Transforms/Scalar/ABCD.cpp
+++ b/lib/Transforms/Scalar/ABCD.cpp
@@ -505,7 +505,7 @@ void ABCD::executeABCD(Function &F) {
       continue;
 
     ICmpInst *ICI = dyn_cast<ICmpInst>(TI->getOperand(0));
-    if (!ICI || !isa<IntegerType>(ICI->getOperand(0)->getType()))
+    if (!ICI || !ICI->getOperand(0)->getType()->isIntegerTy())
       continue;
 
     createConstraintCmpInst(ICI, TI);
@@ -713,7 +713,7 @@ void ABCD::createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI) {
   Value *V_op1 = ICI->getOperand(0);
   Value *V_op2 = ICI->getOperand(1);
 
-  if (!isa<IntegerType>(V_op1->getType()))
+  if (!V_op1->getType()->isIntegerTy())
     return;
 
   Instruction *I_op1 = dyn_cast<Instruction>(V_op1);
diff --git a/lib/Transforms/Scalar/Android.mk b/lib/Transforms/Scalar/Android.mk
new file mode 100644
index 0000000..dea9b8f
--- /dev/null
+++ b/lib/Transforms/Scalar/Android.mk
@@ -0,0 +1,55 @@
+LOCAL_PATH:= $(call my-dir)
+
+transforms_scalar_SRC_FILES :=	\
+	ABCD.cpp	\
+	ADCE.cpp	\
+	BasicBlockPlacement.cpp	\
+	CodeGenPrepare.cpp	\
+	ConstantProp.cpp	\
+	DCE.cpp	\
+	DeadStoreElimination.cpp	\
+	GEPSplitter.cpp	\
+	GVN.cpp	\
+	IndVarSimplify.cpp	\
+	JumpThreading.cpp	\
+	LICM.cpp	\
+	LoopDeletion.cpp	\
+	LoopIndexSplit.cpp	\
+	LoopRotation.cpp	\
+	LoopStrengthReduce.cpp	\
+	LoopUnrollPass.cpp	\
+	LoopUnswitch.cpp	\
+	MemCpyOptimizer.cpp	\
+	Reassociate.cpp	\
+	Reg2Mem.cpp	\
+	SCCP.cpp	\
+	SCCVN.cpp	\
+	Scalar.cpp	\
+	ScalarReplAggregates.cpp	\
+	SimplifyCFGPass.cpp	\
+	SimplifyHalfPowrLibCalls.cpp	\
+	SimplifyLibCalls.cpp	\
+	TailDuplication.cpp	\
+	TailRecursionElimination.cpp
+
+# For the host
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(transforms_scalar_SRC_FILES) 
+LOCAL_MODULE:= libLLVMScalarOpts
+
+include $(LLVM_HOST_BUILD_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+# For the device
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(transforms_scalar_SRC_FILES) 
+LOCAL_MODULE:= libLLVMScalarOpts
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index fa60d3f..7ceda1f 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -32,7 +32,6 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Support/CallSite.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/PatternMatch.h"
@@ -40,9 +39,6 @@
 using namespace llvm;
 using namespace llvm::PatternMatch;
 
-static cl::opt<bool> FactorCommonPreds("split-critical-paths-tweak",
-                                       cl::init(false), cl::Hidden);
-
 namespace {
   class CodeGenPrepare : public FunctionPass {
     /// TLI - Keep a pointer of a TargetLowering to consult for determining
@@ -301,6 +297,70 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
   DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
 }
 
+/// FindReusablePredBB - Check all of the predecessors of the block DestPHI
+/// lives in to see if there is a block that we can reuse as a critical edge
+/// from TIBB.
+static BasicBlock *FindReusablePredBB(PHINode *DestPHI, BasicBlock *TIBB) {
+  BasicBlock *Dest = DestPHI->getParent();
+  
+  /// TIPHIValues - This array is lazily computed to determine the values of
+  /// PHIs in Dest that TI would provide.
+  SmallVector<Value*, 32> TIPHIValues;
+  
+  /// TIBBEntryNo - This is a cache to speed up pred queries for TIBB.
+  unsigned TIBBEntryNo = 0;
+  
+  // Check to see if Dest has any blocks that can be used as a split edge for
+  // this terminator.
+  for (unsigned pi = 0, e = DestPHI->getNumIncomingValues(); pi != e; ++pi) {
+    BasicBlock *Pred = DestPHI->getIncomingBlock(pi);
+    // To be usable, the pred has to end with an uncond branch to the dest.
+    BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator());
+    if (!PredBr || !PredBr->isUnconditional())
+      continue;
+    // Must be empty other than the branch and debug info.
+    BasicBlock::iterator I = Pred->begin();
+    while (isa<DbgInfoIntrinsic>(I))
+      I++;
+    if (&*I != PredBr)
+      continue;
+    // Cannot be the entry block; its label does not get emitted.
+    if (Pred == &Dest->getParent()->getEntryBlock())
+      continue;
+    
+    // Finally, since we know that Dest has phi nodes in it, we have to make
+    // sure that jumping to Pred will have the same effect as going to Dest in
+    // terms of PHI values.
+    PHINode *PN;
+    unsigned PHINo = 0;
+    unsigned PredEntryNo = pi;
+    
+    bool FoundMatch = true;
+    for (BasicBlock::iterator I = Dest->begin();
+         (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) {
+      if (PHINo == TIPHIValues.size()) {
+        if (PN->getIncomingBlock(TIBBEntryNo) != TIBB)
+          TIBBEntryNo = PN->getBasicBlockIndex(TIBB);
+        TIPHIValues.push_back(PN->getIncomingValue(TIBBEntryNo));
+      }
+      
+      // If the PHI entry doesn't work, we can't use this pred.
+      if (PN->getIncomingBlock(PredEntryNo) != Pred)
+        PredEntryNo = PN->getBasicBlockIndex(Pred);
+      
+      if (TIPHIValues[PHINo] != PN->getIncomingValue(PredEntryNo)) {
+        FoundMatch = false;
+        break;
+      }
+    }
+    
+    // If we found a workable predecessor, change TI to branch to Succ.
+    if (FoundMatch)
+      return Pred;
+  }
+  return 0;  
+}
+
 
 /// SplitEdgeNicely - Split the critical edge from TI to its specified
 /// successor if it will improve codegen.  We only do this if the successor has
@@ -315,13 +375,12 @@ static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum,
   BasicBlock *Dest = TI->getSuccessor(SuccNum);
   assert(isa<PHINode>(Dest->begin()) &&
          "This should only be called if Dest has a PHI!");
+  PHINode *DestPHI = cast<PHINode>(Dest->begin());
 
   // Do not split edges to EH landing pads.
-  if (InvokeInst *Invoke = dyn_cast<InvokeInst>(TI)) {
+  if (InvokeInst *Invoke = dyn_cast<InvokeInst>(TI))
     if (Invoke->getSuccessor(1) == Dest)
       return;
-  }
-  
 
   // As a hack, never split backedges of loops.  Even though the copy for any
   // PHIs inserted on the backedge would be dead for exits from the loop, we
@@ -329,92 +388,16 @@ static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum,
   if (BackEdges.count(std::make_pair(TIBB, Dest)))
     return;
 
-  if (!FactorCommonPreds) {
-    /// TIPHIValues - This array is lazily computed to determine the values of
-    /// PHIs in Dest that TI would provide.
-    SmallVector<Value*, 32> TIPHIValues;
-
-    // Check to see if Dest has any blocks that can be used as a split edge for
-    // this terminator.
-    for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) {
-      BasicBlock *Pred = *PI;
-      // To be usable, the pred has to end with an uncond branch to the dest.
-      BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator());
-      if (!PredBr || !PredBr->isUnconditional())
-        continue;
-      // Must be empty other than the branch and debug info.
-      BasicBlock::iterator I = Pred->begin();
-      while (isa<DbgInfoIntrinsic>(I))
-        I++;
-      if (dyn_cast<Instruction>(I) != PredBr)
-        continue;
-      // Cannot be the entry block; its label does not get emitted.
-      if (Pred == &(Dest->getParent()->getEntryBlock()))
-        continue;
-
-      // Finally, since we know that Dest has phi nodes in it, we have to make
-      // sure that jumping to Pred will have the same effect as going to Dest in
-      // terms of PHI values.
-      PHINode *PN;
-      unsigned PHINo = 0;
-      bool FoundMatch = true;
-      for (BasicBlock::iterator I = Dest->begin();
-           (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) {
-        if (PHINo == TIPHIValues.size())
-          TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB));
-
-        // If the PHI entry doesn't work, we can't use this pred.
-        if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) {
-          FoundMatch = false;
-          break;
-        }
-      }
-
-      // If we found a workable predecessor, change TI to branch to Succ.
-      if (FoundMatch) {
-        ProfileInfo *PFI = P->getAnalysisIfAvailable<ProfileInfo>();
-        if (PFI)
-          PFI->splitEdge(TIBB, Dest, Pred);
-        Dest->removePredecessor(TIBB);
-        TI->setSuccessor(SuccNum, Pred);
-        return;
-      }
-    }
-
-    SplitCriticalEdge(TI, SuccNum, P, true);
+  if (BasicBlock *ReuseBB = FindReusablePredBB(DestPHI, TIBB)) {
+    ProfileInfo *PFI = P->getAnalysisIfAvailable<ProfileInfo>();
+    if (PFI)
+      PFI->splitEdge(TIBB, Dest, ReuseBB);
+    Dest->removePredecessor(TIBB);
+    TI->setSuccessor(SuccNum, ReuseBB);
     return;
   }
 
-  PHINode *PN;
-  SmallVector<Value*, 8> TIPHIValues;
-  for (BasicBlock::iterator I = Dest->begin();
-       (PN = dyn_cast<PHINode>(I)); ++I)
-    TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB));
-
-  SmallVector<BasicBlock*, 8> IdenticalPreds;
-  for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) {
-    BasicBlock *Pred = *PI;
-    if (BackEdges.count(std::make_pair(Pred, Dest)))
-      continue;
-    if (PI == TIBB)
-      IdenticalPreds.push_back(Pred);
-    else {
-      bool Identical = true;
-      unsigned PHINo = 0;
-      for (BasicBlock::iterator I = Dest->begin();
-           (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo)
-        if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) {
-          Identical = false;
-          break;
-        }
-      if (Identical)
-        IdenticalPreds.push_back(Pred);
-    }
-  }
-
-  assert(!IdenticalPreds.empty());
-  SplitBlockPredecessors(Dest, &IdenticalPreds[0], IdenticalPreds.size(),
-                         ".critedge", P);
+  SplitCriticalEdge(TI, SuccNum, P, true);
 }
 
 
@@ -629,7 +612,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
     // we'd end up sinking both muls.
     if (AddrMode.BaseReg) {
       Value *V = AddrMode.BaseReg;
-      if (isa<PointerType>(V->getType()))
+      if (V->getType()->isPointerTy())
         V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
       if (V->getType() != IntPtrTy)
         V = CastInst::CreateIntegerCast(V, IntPtrTy, /*isSigned=*/true,
@@ -642,7 +625,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
       Value *V = AddrMode.ScaledReg;
       if (V->getType() == IntPtrTy) {
         // done.
-      } else if (isa<PointerType>(V->getType())) {
+      } else if (V->getType()->isPointerTy()) {
         V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
       } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
                  cast<IntegerType>(V->getType())->getBitWidth()) {
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 320afa1..09c01d3 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -44,8 +44,14 @@ namespace {
 
     virtual bool runOnFunction(Function &F) {
       bool Changed = false;
+      
+      DominatorTree &DT = getAnalysis<DominatorTree>();
+      
       for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
-        Changed |= runOnBasicBlock(*I);
+        // Only check non-dead blocks.  Dead blocks may have strange pointer
+        // cycles that will confuse alias analysis.
+        if (DT.isReachableFromEntry(I))
+          Changed |= runOnBasicBlock(*I);
       return Changed;
     }
     
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 80e0027..fcb802a 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -662,11 +662,10 @@ namespace {
     bool runOnFunction(Function &F);
   public:
     static char ID; // Pass identification, replacement for typeid
-    explicit GVN(bool nopre = false, bool noloads = false)
-      : FunctionPass(&ID), NoPRE(nopre), NoLoads(noloads), MD(0) { }
+    explicit GVN(bool noloads = false)
+      : FunctionPass(&ID), NoLoads(noloads), MD(0) { }
 
   private:
-    bool NoPRE;
     bool NoLoads;
     MemoryDependenceAnalysis *MD;
     DominatorTree *DT;
@@ -674,6 +673,9 @@ namespace {
     ValueTable VN;
     DenseMap<BasicBlock*, ValueNumberScope*> localAvail;
 
+    // List of critical edges to be split between iterations.
+    SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit;
+
     // This transformation requires dominator postdominator info
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<DominatorTree>();
@@ -701,14 +703,15 @@ namespace {
     Value *lookupNumber(BasicBlock *BB, uint32_t num);
     void cleanupGlobalSets();
     void verifyRemoved(const Instruction *I) const;
+    bool splitCriticalEdges();
   };
 
   char GVN::ID = 0;
 }
 
 // createGVNPass - The public interface to this file...
-FunctionPass *llvm::createGVNPass(bool NoPRE, bool NoLoads) {
-  return new GVN(NoPRE, NoLoads);
+FunctionPass *llvm::createGVNPass(bool NoLoads) {
+  return new GVN(NoLoads);
 }
 
 static RegisterPass<GVN> X("gvn",
@@ -836,9 +839,9 @@ static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
                                             const TargetData &TD) {
   // If the loaded or stored value is an first class array or struct, don't try
   // to transform them.  We need to be able to bitcast to integer.
-  if (isa<StructType>(LoadTy) || isa<ArrayType>(LoadTy) ||
-      isa<StructType>(StoredVal->getType()) ||
-      isa<ArrayType>(StoredVal->getType()))
+  if (LoadTy->isStructTy() || LoadTy->isArrayTy() ||
+      StoredVal->getType()->isStructTy() ||
+      StoredVal->getType()->isArrayTy())
     return false;
   
   // The store has to be at least as big as the load.
@@ -870,26 +873,26 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
   
   // If the store and reload are the same size, we can always reuse it.
   if (StoreSize == LoadSize) {
-    if (isa<PointerType>(StoredValTy) && isa<PointerType>(LoadedTy)) {
+    if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) {
       // Pointer to Pointer -> use bitcast.
       return new BitCastInst(StoredVal, LoadedTy, "", InsertPt);
     }
     
     // Convert source pointers to integers, which can be bitcast.
-    if (isa<PointerType>(StoredValTy)) {
+    if (StoredValTy->isPointerTy()) {
       StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
       StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
     }
     
     const Type *TypeToCastTo = LoadedTy;
-    if (isa<PointerType>(TypeToCastTo))
+    if (TypeToCastTo->isPointerTy())
       TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext());
     
     if (StoredValTy != TypeToCastTo)
       StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt);
     
     // Cast to pointer if the load needs a pointer type.
-    if (isa<PointerType>(LoadedTy))
+    if (LoadedTy->isPointerTy())
       StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt);
     
     return StoredVal;
@@ -901,13 +904,13 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
   assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail");
   
   // Convert source pointers to integers, which can be manipulated.
-  if (isa<PointerType>(StoredValTy)) {
+  if (StoredValTy->isPointerTy()) {
     StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
     StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
   }
   
   // Convert vectors and fp to integer, which can be manipulated.
-  if (!isa<IntegerType>(StoredValTy)) {
+  if (!StoredValTy->isIntegerTy()) {
     StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize);
     StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt);
   }
@@ -927,7 +930,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
     return StoredVal;
   
   // If the result is a pointer, inttoptr.
-  if (isa<PointerType>(LoadedTy))
+  if (LoadedTy->isPointerTy())
     return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt);
   
   // Otherwise, bitcast.
@@ -989,7 +992,7 @@ static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr,
                                           const TargetData &TD) {
   // If the loaded or stored value is an first class array or struct, don't try
   // to transform them.  We need to be able to bitcast to integer.
-  if (isa<StructType>(LoadTy) || isa<ArrayType>(LoadTy))
+  if (LoadTy->isStructTy() || LoadTy->isArrayTy())
     return -1;
   
   int64_t StoreOffset = 0, LoadOffset = 0;
@@ -1064,8 +1067,8 @@ static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr,
                                           StoreInst *DepSI,
                                           const TargetData &TD) {
   // Cannot handle reading from store of first-class aggregate yet.
-  if (isa<StructType>(DepSI->getOperand(0)->getType()) ||
-      isa<ArrayType>(DepSI->getOperand(0)->getType()))
+  if (DepSI->getOperand(0)->getType()->isStructTy() ||
+      DepSI->getOperand(0)->getType()->isArrayTy())
     return -1;
 
   Value *StorePtr = DepSI->getPointerOperand();
@@ -1136,9 +1139,9 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
   
   // Compute which bits of the stored value are being used by the load.  Convert
   // to an integer type to start with.
-  if (isa<PointerType>(SrcVal->getType()))
+  if (SrcVal->getType()->isPointerTy())
     SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx), "tmp");
-  if (!isa<IntegerType>(SrcVal->getType()))
+  if (!SrcVal->getType()->isIntegerTy())
     SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8),
                                    "tmp");
   
@@ -1323,7 +1326,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
   Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
   
   // If new PHI nodes were created, notify alias analysis.
-  if (isa<PointerType>(V->getType()))
+  if (V->getType()->isPointerTy())
     for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
       AA->copyValue(LI, NewPHIs[i]);
 
@@ -1491,8 +1494,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
 
     if (isa<PHINode>(V))
       V->takeName(LI);
-    if (isa<PointerType>(V->getType()))
+    if (V->getType()->isPointerTy())
       MD->invalidateCachedPointerInfo(V);
+    VN.erase(LI);
     toErase.push_back(LI);
     NumGVNLoad++;
     return true;
@@ -1538,11 +1542,13 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
   // at least one of the values is LI.  Since this means that we won't be able
   // to eliminate LI even if we insert uses in the other predecessors, we will
   // end up increasing code size.  Reject this by scanning for LI.
-  if (!EnableFullLoadPRE) {
-    for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
-      if (ValuesPerBlock[i].isSimpleValue() &&
-          ValuesPerBlock[i].getSimpleValue() == LI)
+  for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
+    if (ValuesPerBlock[i].isSimpleValue() &&
+        ValuesPerBlock[i].getSimpleValue() == LI) {
+      // Skip cases where LI is the only definition, even for EnableFullLoadPRE.
+      if (!EnableFullLoadPRE || e == 1)
         return false;
+    }
   }
 
   // FIXME: It is extremely unclear what this loop is doing, other than
@@ -1576,6 +1582,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
   for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
     FullyAvailableBlocks[UnavailableBlocks[i]] = false;
 
+  bool NeedToSplitEdges = false;
   for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB);
        PI != E; ++PI) {
     BasicBlock *Pred = *PI;
@@ -1583,13 +1590,20 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
       continue;
     }
     PredLoads[Pred] = 0;
-    // We don't currently handle critical edges :(
+
     if (Pred->getTerminator()->getNumSuccessors() != 1) {
-      DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '"
-            << Pred->getName() << "': " << *LI << '\n');
-      return false;
+      if (isa<IndirectBrInst>(Pred->getTerminator())) {
+        DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '"
+              << Pred->getName() << "': " << *LI << '\n');
+        return false;
+      }
+      unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB);
+      toSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum));
+      NeedToSplitEdges = true;
     }
   }
+  if (NeedToSplitEdges)
+    return false;
 
   // Decide whether PRE is profitable for this load.
   unsigned NumUnavailablePreds = PredLoads.size();
@@ -1623,13 +1637,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
       LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
                                                   *DT, NewInsts);
     } else {
-      Address.PHITranslateValue(LoadBB, UnavailablePred);
+      Address.PHITranslateValue(LoadBB, UnavailablePred, DT);
       LoadPtr = Address.getAddr();
-    
-      // Make sure the value is live in the predecessor.
-      if (Instruction *Inst = dyn_cast_or_null<Instruction>(LoadPtr))
-        if (!DT->dominates(Inst->getParent(), UnavailablePred))
-          LoadPtr = 0;
     }
 
     // If we couldn't find or insert a computation of this phi translated value,
@@ -1697,6 +1706,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
     // Add the newly created load.
     ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,
                                                         NewLoad));
+    MD->invalidateCachedPointerInfo(LoadPtr);
+    DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n');
   }
 
   // Perform PHI construction.
@@ -1705,8 +1716,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
   LI->replaceAllUsesWith(V);
   if (isa<PHINode>(V))
     V->takeName(LI);
-  if (isa<PointerType>(V->getType()))
+  if (V->getType()->isPointerTy())
     MD->invalidateCachedPointerInfo(V);
+  VN.erase(LI);
   toErase.push_back(LI);
   NumPRELoad++;
   return true;
@@ -1765,8 +1777,9 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
       
       // Replace the load!
       L->replaceAllUsesWith(AvailVal);
-      if (isa<PointerType>(AvailVal->getType()))
+      if (AvailVal->getType()->isPointerTy())
         MD->invalidateCachedPointerInfo(AvailVal);
+      VN.erase(L);
       toErase.push_back(L);
       NumGVNLoad++;
       return true;
@@ -1810,8 +1823,9 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
 
     // Remove it!
     L->replaceAllUsesWith(StoredVal);
-    if (isa<PointerType>(StoredVal->getType()))
+    if (StoredVal->getType()->isPointerTy())
       MD->invalidateCachedPointerInfo(StoredVal);
+    VN.erase(L);
     toErase.push_back(L);
     NumGVNLoad++;
     return true;
@@ -1839,8 +1853,9 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
     
     // Remove it!
     L->replaceAllUsesWith(AvailableVal);
-    if (isa<PointerType>(DepLI->getType()))
+    if (DepLI->getType()->isPointerTy())
       MD->invalidateCachedPointerInfo(DepLI);
+    VN.erase(L);
     toErase.push_back(L);
     NumGVNLoad++;
     return true;
@@ -1851,6 +1866,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
   // intervening stores, for example.
   if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) {
     L->replaceAllUsesWith(UndefValue::get(L->getType()));
+    VN.erase(L);
     toErase.push_back(L);
     NumGVNLoad++;
     return true;
@@ -1861,6 +1877,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
   if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) {
     if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
       L->replaceAllUsesWith(UndefValue::get(L->getType()));
+      VN.erase(L);
       toErase.push_back(L);
       NumGVNLoad++;
       return true;
@@ -1891,6 +1908,10 @@ Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) {
 /// by inserting it into the appropriate sets
 bool GVN::processInstruction(Instruction *I,
                              SmallVectorImpl<Instruction*> &toErase) {
+  // Ignore dbg info intrinsics.
+  if (isa<DbgInfoIntrinsic>(I))
+    return false;
+
   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
     bool Changed = processLoad(LI, toErase);
 
@@ -1939,7 +1960,7 @@ bool GVN::processInstruction(Instruction *I,
 
     if (constVal) {
       p->replaceAllUsesWith(constVal);
-      if (MD && isa<PointerType>(constVal->getType()))
+      if (MD && constVal->getType()->isPointerTy())
         MD->invalidateCachedPointerInfo(constVal);
       VN.erase(p);
 
@@ -1960,7 +1981,7 @@ bool GVN::processInstruction(Instruction *I,
     // Remove it!
     VN.erase(I);
     I->replaceAllUsesWith(repl);
-    if (MD && isa<PointerType>(repl->getType()))
+    if (MD && repl->getType()->isPointerTy())
       MD->invalidateCachedPointerInfo(repl);
     toErase.push_back(I);
     return true;
@@ -2000,6 +2021,8 @@ bool GVN::runOnFunction(Function& F) {
   while (ShouldContinue) {
     DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
     ShouldContinue = iterateOnFunction(F);
+    if (splitCriticalEdges())
+      ShouldContinue = true;
     Changed |= ShouldContinue;
     ++Iteration;
   }
@@ -2066,7 +2089,6 @@ bool GVN::processBlock(BasicBlock *BB) {
 /// control flow patterns and attempts to perform simple PRE at the join point.
 bool GVN::performPRE(Function &F) {
   bool Changed = false;
-  SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit;
   DenseMap<BasicBlock*, Value*> predMap;
   for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
        DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
@@ -2137,14 +2159,7 @@ bool GVN::performPRE(Function &F) {
       // We can't do PRE safely on a critical edge, so instead we schedule
       // the edge to be split and perform the PRE the next time we iterate
       // on the function.
-      unsigned SuccNum = 0;
-      for (unsigned i = 0, e = PREPred->getTerminator()->getNumSuccessors();
-           i != e; ++i)
-        if (PREPred->getTerminator()->getSuccessor(i) == CurrentBlock) {
-          SuccNum = i;
-          break;
-        }
-
+      unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock);
       if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
         toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
         continue;
@@ -2200,7 +2215,7 @@ bool GVN::performPRE(Function &F) {
       localAvail[CurrentBlock]->table[ValNo] = Phi;
 
       CurInst->replaceAllUsesWith(Phi);
-      if (MD && isa<PointerType>(Phi->getType()))
+      if (MD && Phi->getType()->isPointerTy())
         MD->invalidateCachedPointerInfo(Phi);
       VN.erase(CurInst);
 
@@ -2212,11 +2227,23 @@ bool GVN::performPRE(Function &F) {
     }
   }
 
-  for (SmallVector<std::pair<TerminatorInst*, unsigned>, 4>::iterator
-       I = toSplit.begin(), E = toSplit.end(); I != E; ++I)
-    SplitCriticalEdge(I->first, I->second, this);
+  if (splitCriticalEdges())
+    Changed = true;
+
+  return Changed;
+}
 
-  return Changed || toSplit.size();
+/// splitCriticalEdges - Split critical edges found during the previous
+/// iteration that may enable further optimization.
+bool GVN::splitCriticalEdges() {
+  if (toSplit.empty())
+    return false;
+  do {
+    std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val();
+    SplitCriticalEdge(Edge.first, Edge.second, this);
+  } while (!toSplit.empty());
+  if (MD) MD->invalidateCachedPredecessors();
+  return true;
 }
 
 /// iterateOnFunction - Executes one iteration of GVN
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index c54f596..cb563c3 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -103,11 +103,9 @@ namespace {
                                    BasicBlock *ExitingBlock,
                                    BranchInst *BI,
                                    SCEVExpander &Rewriter);
-    void RewriteLoopExitValues(Loop *L, const SCEV *BackedgeTakenCount,
-                               SCEVExpander &Rewriter);
+    void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
 
-    void RewriteIVExpressions(Loop *L, const Type *LargestType,
-                              SCEVExpander &Rewriter);
+    void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter);
 
     void SinkUnusedInvariants(Loop *L);
 
@@ -190,7 +188,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
 
   ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond");
 
-  Instruction *OrigCond = cast<Instruction>(BI->getCondition());
+  Value *OrigCond = BI->getCondition();
   // It's tempting to use replaceAllUsesWith here to fully replace the old
   // comparison, but that's not immediately safe, since users of the old
   // comparison may not be dominated by the new comparison. Instead, just
@@ -215,7 +213,6 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
 /// able to brute-force evaluate arbitrary instructions as long as they have
 /// constant operands at the beginning of the loop.
 void IndVarSimplify::RewriteLoopExitValues(Loop *L,
-                                           const SCEV *BackedgeTakenCount,
                                            SCEVExpander &Rewriter) {
   // Verify the input to the pass in already in LCSSA form.
   assert(L->isLCSSAForm());
@@ -241,15 +238,24 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L,
     while ((PN = dyn_cast<PHINode>(BBI++))) {
       if (PN->use_empty())
         continue; // dead use, don't replace it
+
+      // SCEV only supports integer expressions for now.
+      if (!PN->getType()->isIntegerTy() && !PN->getType()->isPointerTy())
+        continue;
+
+      // It's necessary to tell ScalarEvolution about this explicitly so that
+      // it can walk the def-use list and forget all SCEVs, as it may not be
+      // watching the PHI itself. Once the new exit value is in place, there
+      // may not be a def-use connection between the loop and every instruction
+      // which got a SCEVAddRecExpr for that loop.
+      SE->forgetValue(PN);
+
       // Iterate over all of the values in all the PHI nodes.
       for (unsigned i = 0; i != NumPreds; ++i) {
         // If the value being merged in is not integer or is not defined
         // in the loop, skip it.
         Value *InVal = PN->getIncomingValue(i);
-        if (!isa<Instruction>(InVal) ||
-            // SCEV only supports integer expressions for now.
-            (!isa<IntegerType>(InVal->getType()) &&
-             !isa<PointerType>(InVal->getType())))
+        if (!isa<Instruction>(InVal))
           continue;
 
         // If this pred is for a subloop, not L itself, skip it.
@@ -349,7 +355,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   // the current expressions.
   //
   if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount))
-    RewriteLoopExitValues(L, BackedgeTakenCount, Rewriter);
+    RewriteLoopExitValues(L, Rewriter);
 
   // Compute the type of the largest recurrence expression, and decide whether
   // a canonical induction variable should be inserted.
@@ -364,37 +370,32 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
     if (ExitingBlock)
       NeedCannIV = true;
   }
-  for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
-    const SCEV *Stride = IU->StrideOrder[i];
-    const Type *Ty = SE->getEffectiveSCEVType(Stride->getType());
+  for (IVUsers::const_iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
+    const Type *Ty =
+      SE->getEffectiveSCEVType(I->getOperandValToReplace()->getType());
     if (!LargestType ||
         SE->getTypeSizeInBits(Ty) >
           SE->getTypeSizeInBits(LargestType))
       LargestType = Ty;
-
-    std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
-      IU->IVUsesByStride.find(IU->StrideOrder[i]);
-    assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
-
-    if (!SI->second->Users.empty())
-      NeedCannIV = true;
+    NeedCannIV = true;
   }
 
   // Now that we know the largest of the induction variable expressions
   // in this loop, insert a canonical induction variable of the largest size.
   Value *IndVar = 0;
   if (NeedCannIV) {
-    // Check to see if the loop already has a canonical-looking induction
-    // variable. If one is present and it's wider than the planned canonical
-    // induction variable, temporarily remove it, so that the Rewriter
-    // doesn't attempt to reuse it.
-    PHINode *OldCannIV = L->getCanonicalInductionVariable();
-    if (OldCannIV) {
+    // Check to see if the loop already has any canonical-looking induction
+    // variables. If any are present and wider than the planned canonical
+    // induction variable, temporarily remove them, so that the Rewriter
+    // doesn't attempt to reuse them.
+    SmallVector<PHINode *, 2> OldCannIVs;
+    while (PHINode *OldCannIV = L->getCanonicalInductionVariable()) {
       if (SE->getTypeSizeInBits(OldCannIV->getType()) >
           SE->getTypeSizeInBits(LargestType))
         OldCannIV->removeFromParent();
       else
-        OldCannIV = 0;
+        break;
+      OldCannIVs.push_back(OldCannIV);
     }
 
     IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L, LargestType);
@@ -404,17 +405,21 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
     DEBUG(dbgs() << "INDVARS: New CanIV: " << *IndVar << '\n');
 
     // Now that the official induction variable is established, reinsert
-    // the old canonical-looking variable after it so that the IR remains
-    // consistent. It will be deleted as part of the dead-PHI deletion at
+    // any old canonical-looking variables after it so that the IR remains
+    // consistent. They will be deleted as part of the dead-PHI deletion at
     // the end of the pass.
-    if (OldCannIV)
-      OldCannIV->insertAfter(cast<Instruction>(IndVar));
+    while (!OldCannIVs.empty()) {
+      PHINode *OldCannIV = OldCannIVs.pop_back_val();
+      OldCannIV->insertBefore(L->getHeader()->getFirstNonPHI());
+    }
   }
 
   // If we have a trip count expression, rewrite the loop's exit condition
   // using it.  We can currently only handle loops with a single exit.
   ICmpInst *NewICmp = 0;
-  if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) && ExitingBlock) {
+  if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) &&
+      !BackedgeTakenCount->isZero() &&
+      ExitingBlock) {
     assert(NeedCannIV &&
            "LinearFunctionTestReplace requires a canonical induction variable");
     // Can't rewrite non-branch yet.
@@ -424,7 +429,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   }
 
   // Rewrite IV-derived expressions. Clears the rewriter cache.
-  RewriteIVExpressions(L, LargestType, Rewriter);
+  RewriteIVExpressions(L, Rewriter);
 
   // The Rewriter may not be used from this point on.
 
@@ -444,8 +449,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   return Changed;
 }
 
-void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
-                                          SCEVExpander &Rewriter) {
+void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
   SmallVector<WeakVH, 16> DeadInsts;
 
   // Rewrite all induction variable expressions in terms of the canonical
@@ -455,72 +459,64 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
   // add the offsets to the primary induction variable and cast, avoiding
   // the need for the code evaluation methods to insert induction variables
   // of different sizes.
-  for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
-    const SCEV *Stride = IU->StrideOrder[i];
-
-    std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
-      IU->IVUsesByStride.find(IU->StrideOrder[i]);
-    assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
-    ilist<IVStrideUse> &List = SI->second->Users;
-    for (ilist<IVStrideUse>::iterator UI = List.begin(),
-         E = List.end(); UI != E; ++UI) {
-      Value *Op = UI->getOperandValToReplace();
-      const Type *UseTy = Op->getType();
-      Instruction *User = UI->getUser();
-
-      // Compute the final addrec to expand into code.
-      const SCEV *AR = IU->getReplacementExpr(*UI);
-
-      // Evaluate the expression out of the loop, if possible.
-      if (!L->contains(UI->getUser())) {
-        const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop());
-        if (ExitVal->isLoopInvariant(L))
-          AR = ExitVal;
-      }
+  for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) {
+    const SCEV *Stride = UI->getStride();
+    Value *Op = UI->getOperandValToReplace();
+    const Type *UseTy = Op->getType();
+    Instruction *User = UI->getUser();
+
+    // Compute the final addrec to expand into code.
+    const SCEV *AR = IU->getReplacementExpr(*UI);
+
+    // Evaluate the expression out of the loop, if possible.
+    if (!L->contains(UI->getUser())) {
+      const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop());
+      if (ExitVal->isLoopInvariant(L))
+        AR = ExitVal;
+    }
 
-      // FIXME: It is an extremely bad idea to indvar substitute anything more
-      // complex than affine induction variables.  Doing so will put expensive
-      // polynomial evaluations inside of the loop, and the str reduction pass
-      // currently can only reduce affine polynomials.  For now just disable
-      // indvar subst on anything more complex than an affine addrec, unless
-      // it can be expanded to a trivial value.
-      if (!AR->isLoopInvariant(L) && !Stride->isLoopInvariant(L))
-        continue;
+    // FIXME: It is an extremely bad idea to indvar substitute anything more
+    // complex than affine induction variables.  Doing so will put expensive
+    // polynomial evaluations inside of the loop, and the str reduction pass
+    // currently can only reduce affine polynomials.  For now just disable
+    // indvar subst on anything more complex than an affine addrec, unless
+    // it can be expanded to a trivial value.
+    if (!AR->isLoopInvariant(L) && !Stride->isLoopInvariant(L))
+      continue;
 
-      // Determine the insertion point for this user. By default, insert
-      // immediately before the user. The SCEVExpander class will automatically
-      // hoist loop invariants out of the loop. For PHI nodes, there may be
-      // multiple uses, so compute the nearest common dominator for the
-      // incoming blocks.
-      Instruction *InsertPt = User;
-      if (PHINode *PHI = dyn_cast<PHINode>(InsertPt))
-        for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
-          if (PHI->getIncomingValue(i) == Op) {
-            if (InsertPt == User)
-              InsertPt = PHI->getIncomingBlock(i)->getTerminator();
-            else
-              InsertPt =
-                DT->findNearestCommonDominator(InsertPt->getParent(),
-                                               PHI->getIncomingBlock(i))
-                      ->getTerminator();
-          }
-
-      // Now expand it into actual Instructions and patch it into place.
-      Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
-
-      // Patch the new value into place.
-      if (Op->hasName())
-        NewVal->takeName(Op);
-      User->replaceUsesOfWith(Op, NewVal);
-      UI->setOperandValToReplace(NewVal);
-      DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
-                   << "   into = " << *NewVal << "\n");
-      ++NumRemoved;
-      Changed = true;
-
-      // The old value may be dead now.
-      DeadInsts.push_back(Op);
-    }
+    // Determine the insertion point for this user. By default, insert
+    // immediately before the user. The SCEVExpander class will automatically
+    // hoist loop invariants out of the loop. For PHI nodes, there may be
+    // multiple uses, so compute the nearest common dominator for the
+    // incoming blocks.
+    Instruction *InsertPt = User;
+    if (PHINode *PHI = dyn_cast<PHINode>(InsertPt))
+      for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
+        if (PHI->getIncomingValue(i) == Op) {
+          if (InsertPt == User)
+            InsertPt = PHI->getIncomingBlock(i)->getTerminator();
+          else
+            InsertPt =
+              DT->findNearestCommonDominator(InsertPt->getParent(),
+                                             PHI->getIncomingBlock(i))
+                    ->getTerminator();
+        }
+
+    // Now expand it into actual Instructions and patch it into place.
+    Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
+
+    // Patch the new value into place.
+    if (Op->hasName())
+      NewVal->takeName(Op);
+    User->replaceUsesOfWith(Op, NewVal);
+    UI->setOperandValToReplace(NewVal);
+    DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
+                 << "   into = " << *NewVal << "\n");
+    ++NumRemoved;
+    Changed = true;
+
+    // The old value may be dead now.
+    DeadInsts.push_back(Op);
   }
 
   // Clear the rewriter cache, because values that are in the rewriter's cache
@@ -598,8 +594,8 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
   }
 }
 
-/// Return true if it is OK to use SIToFPInst for an inducation variable
-/// with given inital and exit values.
+/// Return true if it is OK to use SIToFPInst for an induction variable
+/// with given initial and exit values.
 static bool useSIToFPInst(ConstantFP &InitV, ConstantFP &ExitV,
                           uint64_t intIV, uint64_t intEV) {
 
@@ -652,7 +648,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) {
   if (!convertToInt(InitValue->getValueAPF(), &newInitValue))
     return;
 
-  // Check IV increment. Reject this PH if increement operation is not
+  // Check IV increment. Reject this PH if increment operation is not
   // an add or increment value can not be represented by an integer.
   BinaryOperator *Incr =
     dyn_cast<BinaryOperator>(PH->getIncomingValue(BackEdge));
@@ -688,7 +684,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) {
     if (BI->getCondition() != EC) return;
   }
 
-  // Find exit value. If exit value can not be represented as an interger then
+  // Find exit value. If exit value can not be represented as an integer then
   // do not handle this floating point PH.
   ConstantFP *EV = NULL;
   unsigned EVIndex = 1;
@@ -750,11 +746,11 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) {
   ICmpInst *NewEC = new ICmpInst(EC->getParent()->getTerminator(),
                                  NewPred, LHS, RHS, EC->getName());
 
-  // In the following deltions, PH may become dead and may be deleted.
+  // In the following deletions, PH may become dead and may be deleted.
   // Use a WeakVH to observe whether this happens.
   WeakVH WeakPH = PH;
 
-  // Delete old, floating point, exit comparision instruction.
+  // Delete old, floating point, exit comparison instruction.
   NewEC->takeName(EC);
   EC->replaceAllUsesWith(NewEC);
   RecursivelyDeleteTriviallyDeadInstructions(EC);
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 3eff3d8..a6489ec 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -201,7 +201,7 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
     if (isa<DbgInfoIntrinsic>(I)) continue;
     
     // If this is a pointer->pointer bitcast, it is free.
-    if (isa<BitCastInst>(I) && isa<PointerType>(I->getType()))
+    if (isa<BitCastInst>(I) && I->getType()->isPointerTy())
       continue;
     
     // All other instructions count for at least one unit.
@@ -214,7 +214,7 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
     if (const CallInst *CI = dyn_cast<CallInst>(I)) {
       if (!isa<IntrinsicInst>(CI))
         Size += 3;
-      else if (!isa<VectorType>(CI->getType()))
+      else if (!CI->getType()->isVectorTy())
         Size += 1;
     }
   }
@@ -336,13 +336,18 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
       else
         InterestingVal = ConstantInt::getFalse(I->getContext());
       
-      // Scan for the sentinel.
+      // Scan for the sentinel.  If we find an undef, force it to the
+      // interesting value: x|undef -> true and x&undef -> false.
       for (unsigned i = 0, e = LHSVals.size(); i != e; ++i)
-        if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0)
+        if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0) {
           Result.push_back(LHSVals[i]);
+          Result.back().first = InterestingVal;
+        }
       for (unsigned i = 0, e = RHSVals.size(); i != e; ++i)
-        if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0)
+        if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0) {
           Result.push_back(RHSVals[i]);
+          Result.back().first = InterestingVal;
+        }
       return !Result.empty();
     }
     
@@ -400,7 +405,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
     // If comparing a live-in value against a constant, see if we know the
     // live-in value on any predecessors.
     if (LVI && isa<Constant>(Cmp->getOperand(1)) &&
-        Cmp->getType()->isInteger() && // Not vector compare.
+        Cmp->getType()->isIntegerTy() && // Not vector compare.
         (!isa<Instruction>(Cmp->getOperand(0)) ||
          cast<Instruction>(Cmp->getOperand(0))->getParent() != BB)) {
       Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 81f9ae6..d7ace34 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -678,7 +678,7 @@ void LICM::PromoteValuesInLoop() {
     // If we are promoting a pointer value, update alias information for the
     // inserted load.
     Value *LoadValue = 0;
-    if (isa<PointerType>(cast<PointerType>(Ptr->getType())->getElementType())) {
+    if (cast<PointerType>(Ptr->getType())->getElementType()->isPointerTy()) {
       // Locate a load or store through the pointer, and assign the same value
       // to LI as we are loading or storing.  Since we know that the value is
       // stored in this loop, this will always succeed.
@@ -751,7 +751,7 @@ void LICM::PromoteValuesInLoop() {
       LoadInst *LI = new LoadInst(PromotedValues[i].first, "", InsertPos);
 
       // If this is a pointer type, update alias info appropriately.
-      if (isa<PointerType>(LI->getType()))
+      if (LI->getType()->isPointerTy())
         CurAST->copyValue(PointerValueNumbers[PVN++], LI);
 
       // Store into the memory we promoted.
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index a5611ff..f920dca 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -17,6 +17,40 @@
 // available on the target, and it performs a variety of other optimizations
 // related to loop induction variables.
 //
+// Terminology note: this code has a lot of handling for "post-increment" or
+// "post-inc" users. This is not talking about post-increment addressing modes;
+// it is instead talking about code like this:
+//
+//   %i = phi [ 0, %entry ], [ %i.next, %latch ]
+//   ...
+//   %i.next = add %i, 1
+//   %c = icmp eq %i.next, %n
+//
+// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
+// it's useful to think about these as the same register, with some uses using
+// the value of the register before the add and some using // it after. In this
+// example, the icmp is a post-increment user, since it uses %i.next, which is
+// the value of the induction variable after the increment. The other common
+// case of post-increment users is users outside the loop.
+//
+// TODO: More sophistication in the way Formulae are generated and filtered.
+//
+// TODO: Handle multiple loops at a time.
+//
+// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr
+//       instead of a GlobalValue?
+//
+// TODO: When truncation is free, truncate ICmp users' operands to make it a
+//       smaller encoding (on x86 at least).
+//
+// TODO: When a negated register is used by an add (such as in a list of
+//       multiple base registers, or as the increment expression in an addrec),
+//       we may not actually need both reg and (-1 * reg) in registers; the
+//       negation can be implemented by using a sub instead of an add. The
+//       lack of support for taking this into consideration when making
+//       register pressure decisions is partly worked around by the "Special"
+//       use kind.
+//
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "loop-reduce"
@@ -26,208 +60,434 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Analysis/IVUsers.h"
+#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
-#include "llvm/Transforms/Utils/AddrModeMatcher.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetLowering.h"
 #include <algorithm>
 using namespace llvm;
 
-STATISTIC(NumReduced ,    "Number of IV uses strength reduced");
-STATISTIC(NumInserted,    "Number of PHIs inserted");
-STATISTIC(NumVariable,    "Number of PHIs with variable strides");
-STATISTIC(NumEliminated,  "Number of strides eliminated");
-STATISTIC(NumShadow,      "Number of Shadow IVs optimized");
-STATISTIC(NumImmSunk,     "Number of common expr immediates sunk into uses");
-STATISTIC(NumLoopCond,    "Number of loop terminating conds optimized");
-STATISTIC(NumCountZero,   "Number of count iv optimized to count toward zero");
+namespace {
+
+/// RegSortData - This class holds data which is used to order reuse candidates.
+class RegSortData {
+public:
+  /// UsedByIndices - This represents the set of LSRUse indices which reference
+  /// a particular register.
+  SmallBitVector UsedByIndices;
+
+  RegSortData() {}
+
+  void print(raw_ostream &OS) const;
+  void dump() const;
+};
 
-static cl::opt<bool> EnableFullLSRMode("enable-full-lsr",
-                                       cl::init(false),
-                                       cl::Hidden);
+}
+
+void RegSortData::print(raw_ostream &OS) const {
+  OS << "[NumUses=" << UsedByIndices.count() << ']';
+}
+
+void RegSortData::dump() const {
+  print(errs()); errs() << '\n';
+}
 
 namespace {
 
-  struct BasedUser;
+/// RegUseTracker - Map register candidates to information about how they are
+/// used.
+class RegUseTracker {
+  typedef DenseMap<const SCEV *, RegSortData> RegUsesTy;
 
-  /// IVInfo - This structure keeps track of one IV expression inserted during
-  /// StrengthReduceStridedIVUsers. It contains the stride, the common base, as
-  /// well as the PHI node and increment value created for rewrite.
-  struct IVExpr {
-    const SCEV *Stride;
-    const SCEV *Base;
-    PHINode    *PHI;
+  RegUsesTy RegUses;
+  SmallVector<const SCEV *, 16> RegSequence;
 
-    IVExpr(const SCEV *const stride, const SCEV *const base, PHINode *phi)
-      : Stride(stride), Base(base), PHI(phi) {}
-  };
+public:
+  void CountRegister(const SCEV *Reg, size_t LUIdx);
+
+  bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
+
+  const SmallBitVector &getUsedByIndices(const SCEV *Reg) const;
+
+  void clear();
+
+  typedef SmallVectorImpl<const SCEV *>::iterator iterator;
+  typedef SmallVectorImpl<const SCEV *>::const_iterator const_iterator;
+  iterator begin() { return RegSequence.begin(); }
+  iterator end()   { return RegSequence.end(); }
+  const_iterator begin() const { return RegSequence.begin(); }
+  const_iterator end() const   { return RegSequence.end(); }
+};
+
+}
 
-  /// IVsOfOneStride - This structure keeps track of all IV expression inserted
-  /// during StrengthReduceStridedIVUsers for a particular stride of the IV.
-  struct IVsOfOneStride {
-    std::vector<IVExpr> IVs;
+void
+RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) {
+  std::pair<RegUsesTy::iterator, bool> Pair =
+    RegUses.insert(std::make_pair(Reg, RegSortData()));
+  RegSortData &RSD = Pair.first->second;
+  if (Pair.second)
+    RegSequence.push_back(Reg);
+  RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
+  RSD.UsedByIndices.set(LUIdx);
+}
+
+bool
+RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
+  if (!RegUses.count(Reg)) return false;
+  const SmallBitVector &UsedByIndices =
+    RegUses.find(Reg)->second.UsedByIndices;
+  int i = UsedByIndices.find_first();
+  if (i == -1) return false;
+  if ((size_t)i != LUIdx) return true;
+  return UsedByIndices.find_next(i) != -1;
+}
+
+const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const {
+  RegUsesTy::const_iterator I = RegUses.find(Reg);
+  assert(I != RegUses.end() && "Unknown register!");
+  return I->second.UsedByIndices;
+}
+
+void RegUseTracker::clear() {
+  RegUses.clear();
+  RegSequence.clear();
+}
+
+namespace {
+
+/// Formula - This class holds information that describes a formula for
+/// computing satisfying a use. It may include broken-out immediates and scaled
+/// registers.
+struct Formula {
+  /// AM - This is used to represent complex addressing, as well as other kinds
+  /// of interesting uses.
+  TargetLowering::AddrMode AM;
+
+  /// BaseRegs - The list of "base" registers for this use. When this is
+  /// non-empty, AM.HasBaseReg should be set to true.
+  SmallVector<const SCEV *, 2> BaseRegs;
 
-    void addIV(const SCEV *const Stride, const SCEV *const Base, PHINode *PHI) {
-      IVs.push_back(IVExpr(Stride, Base, PHI));
+  /// ScaledReg - The 'scaled' register for this use. This should be non-null
+  /// when AM.Scale is not zero.
+  const SCEV *ScaledReg;
+
+  Formula() : ScaledReg(0) {}
+
+  void InitialMatch(const SCEV *S, Loop *L,
+                    ScalarEvolution &SE, DominatorTree &DT);
+
+  unsigned getNumRegs() const;
+  const Type *getType() const;
+
+  bool referencesReg(const SCEV *S) const;
+  bool hasRegsUsedByUsesOtherThan(size_t LUIdx,
+                                  const RegUseTracker &RegUses) const;
+
+  void print(raw_ostream &OS) const;
+  void dump() const;
+};
+
+}
+
+/// DoInitialMatch - Recursion helper for InitialMatch.
+static void DoInitialMatch(const SCEV *S, Loop *L,
+                           SmallVectorImpl<const SCEV *> &Good,
+                           SmallVectorImpl<const SCEV *> &Bad,
+                           ScalarEvolution &SE, DominatorTree &DT) {
+  // Collect expressions which properly dominate the loop header.
+  if (S->properlyDominates(L->getHeader(), &DT)) {
+    Good.push_back(S);
+    return;
+  }
+
+  // Look at add operands.
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+         I != E; ++I)
+      DoInitialMatch(*I, L, Good, Bad, SE, DT);
+    return;
+  }
+
+  // Look at addrec operands.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+    if (!AR->getStart()->isZero()) {
+      DoInitialMatch(AR->getStart(), L, Good, Bad, SE, DT);
+      DoInitialMatch(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()),
+                                      AR->getStepRecurrence(SE),
+                                      AR->getLoop()),
+                     L, Good, Bad, SE, DT);
+      return;
     }
-  };
 
-  class LoopStrengthReduce : public LoopPass {
-    IVUsers *IU;
-    ScalarEvolution *SE;
-    bool Changed;
-
-    /// IVsByStride - Keep track of all IVs that have been inserted for a
-    /// particular stride.
-    std::map<const SCEV *, IVsOfOneStride> IVsByStride;
-
-    /// DeadInsts - Keep track of instructions we may have made dead, so that
-    /// we can remove them after we are done working.
-    SmallVector<WeakVH, 16> DeadInsts;
-
-    /// TLI - Keep a pointer of a TargetLowering to consult for determining
-    /// transformation profitability.
-    const TargetLowering *TLI;
-
-  public:
-    static char ID; // Pass ID, replacement for typeid
-    explicit LoopStrengthReduce(const TargetLowering *tli = NULL) :
-      LoopPass(&ID), TLI(tli) {}
-
-    bool runOnLoop(Loop *L, LPPassManager &LPM);
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      // We split critical edges, so we change the CFG.  However, we do update
-      // many analyses if they are around.
-      AU.addPreservedID(LoopSimplifyID);
-      AU.addPreserved("loops");
-      AU.addPreserved("domfrontier");
-      AU.addPreserved("domtree");
-
-      AU.addRequiredID(LoopSimplifyID);
-      AU.addRequired<ScalarEvolution>();
-      AU.addPreserved<ScalarEvolution>();
-      AU.addRequired<IVUsers>();
-      AU.addPreserved<IVUsers>();
+  // Handle a multiplication by -1 (negation) if it didn't fold.
+  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))
+    if (Mul->getOperand(0)->isAllOnesValue()) {
+      SmallVector<const SCEV *, 4> Ops(Mul->op_begin()+1, Mul->op_end());
+      const SCEV *NewMul = SE.getMulExpr(Ops);
+
+      SmallVector<const SCEV *, 4> MyGood;
+      SmallVector<const SCEV *, 4> MyBad;
+      DoInitialMatch(NewMul, L, MyGood, MyBad, SE, DT);
+      const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
+        SE.getEffectiveSCEVType(NewMul->getType())));
+      for (SmallVectorImpl<const SCEV *>::const_iterator I = MyGood.begin(),
+           E = MyGood.end(); I != E; ++I)
+        Good.push_back(SE.getMulExpr(NegOne, *I));
+      for (SmallVectorImpl<const SCEV *>::const_iterator I = MyBad.begin(),
+           E = MyBad.end(); I != E; ++I)
+        Bad.push_back(SE.getMulExpr(NegOne, *I));
+      return;
     }
 
-  private:
-    void OptimizeIndvars(Loop *L);
-
-    /// OptimizeLoopTermCond - Change loop terminating condition to use the
-    /// postinc iv when possible.
-    void OptimizeLoopTermCond(Loop *L);
-
-    /// OptimizeShadowIV - If IV is used in a int-to-float cast
-    /// inside the loop then try to eliminate the cast opeation.
-    void OptimizeShadowIV(Loop *L);
-
-    /// OptimizeMax - Rewrite the loop's terminating condition
-    /// if it uses a max computation.
-    ICmpInst *OptimizeMax(Loop *L, ICmpInst *Cond,
-                          IVStrideUse* &CondUse);
-
-    /// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for
-    /// deciding when to exit the loop is used only for that purpose, try to
-    /// rearrange things so it counts down to a test against zero.
-    bool OptimizeLoopCountIV(Loop *L);
-    bool OptimizeLoopCountIVOfStride(const SCEV* &Stride,
-                                     IVStrideUse* &CondUse, Loop *L);
-
-    /// StrengthReduceIVUsersOfStride - Strength reduce all of the users of a
-    /// single stride of IV.  All of the users may have different starting
-    /// values, and this may not be the only stride.
-    void StrengthReduceIVUsersOfStride(const SCEV *Stride,
-                                      IVUsersOfOneStride &Uses,
-                                      Loop *L);
-    void StrengthReduceIVUsers(Loop *L);
-
-    ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond,
-                                  IVStrideUse* &CondUse,
-                                  const SCEV* &CondStride,
-                                  bool PostPass = false);
-
-    bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
-                           const SCEV* &CondStride);
-    bool RequiresTypeConversion(const Type *Ty, const Type *NewTy);
-    const SCEV *CheckForIVReuse(bool, bool, bool, const SCEV *,
-                             IVExpr&, const Type*,
-                             const std::vector<BasedUser>& UsersToProcess);
-    bool ValidScale(bool, int64_t,
-                    const std::vector<BasedUser>& UsersToProcess);
-    bool ValidOffset(bool, int64_t, int64_t,
-                     const std::vector<BasedUser>& UsersToProcess);
-    const SCEV *CollectIVUsers(const SCEV *Stride,
-                              IVUsersOfOneStride &Uses,
-                              Loop *L,
-                              bool &AllUsesAreAddresses,
-                              bool &AllUsesAreOutsideLoop,
-                              std::vector<BasedUser> &UsersToProcess);
-    bool StrideMightBeShared(const SCEV *Stride, Loop *L, bool CheckPreInc);
-    bool ShouldUseFullStrengthReductionMode(
-                                const std::vector<BasedUser> &UsersToProcess,
-                                const Loop *L,
-                                bool AllUsesAreAddresses,
-                                const SCEV *Stride);
-    void PrepareToStrengthReduceFully(
-                             std::vector<BasedUser> &UsersToProcess,
-                             const SCEV *Stride,
-                             const SCEV *CommonExprs,
-                             const Loop *L,
-                             SCEVExpander &PreheaderRewriter);
-    void PrepareToStrengthReduceFromSmallerStride(
-                                         std::vector<BasedUser> &UsersToProcess,
-                                         Value *CommonBaseV,
-                                         const IVExpr &ReuseIV,
-                                         Instruction *PreInsertPt);
-    void PrepareToStrengthReduceWithNewPhi(
-                                  std::vector<BasedUser> &UsersToProcess,
-                                  const SCEV *Stride,
-                                  const SCEV *CommonExprs,
-                                  Value *CommonBaseV,
-                                  Instruction *IVIncInsertPt,
-                                  const Loop *L,
-                                  SCEVExpander &PreheaderRewriter);
-
-    void DeleteTriviallyDeadInstructions();
-  };
+  // Ok, we can't do anything interesting. Just stuff the whole thing into a
+  // register and hope for the best.
+  Bad.push_back(S);
 }
 
-char LoopStrengthReduce::ID = 0;
-static RegisterPass<LoopStrengthReduce>
-X("loop-reduce", "Loop Strength Reduction");
+/// InitialMatch - Incorporate loop-variant parts of S into this Formula,
+/// attempting to keep all loop-invariant and loop-computable values in a
+/// single base register.
+void Formula::InitialMatch(const SCEV *S, Loop *L,
+                           ScalarEvolution &SE, DominatorTree &DT) {
+  SmallVector<const SCEV *, 4> Good;
+  SmallVector<const SCEV *, 4> Bad;
+  DoInitialMatch(S, L, Good, Bad, SE, DT);
+  if (!Good.empty()) {
+    BaseRegs.push_back(SE.getAddExpr(Good));
+    AM.HasBaseReg = true;
+  }
+  if (!Bad.empty()) {
+    BaseRegs.push_back(SE.getAddExpr(Bad));
+    AM.HasBaseReg = true;
+  }
+}
 
-Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
-  return new LoopStrengthReduce(TLI);
+/// getNumRegs - Return the total number of register operands used by this
+/// formula. This does not include register uses implied by non-constant
+/// addrec strides.
+unsigned Formula::getNumRegs() const {
+  return !!ScaledReg + BaseRegs.size();
 }
 
-/// DeleteTriviallyDeadInstructions - If any of the instructions is the
-/// specified set are trivially dead, delete them and see if this makes any of
-/// their operands subsequently dead.
-void LoopStrengthReduce::DeleteTriviallyDeadInstructions() {
-  while (!DeadInsts.empty()) {
-    Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
+/// getType - Return the type of this formula, if it has one, or null
+/// otherwise. This type is meaningless except for the bit size.
+const Type *Formula::getType() const {
+  return !BaseRegs.empty() ? BaseRegs.front()->getType() :
+         ScaledReg ? ScaledReg->getType() :
+         AM.BaseGV ? AM.BaseGV->getType() :
+         0;
+}
 
-    if (I == 0 || !isInstructionTriviallyDead(I))
-      continue;
+/// referencesReg - Test if this formula references the given register.
+bool Formula::referencesReg(const SCEV *S) const {
+  return S == ScaledReg ||
+         std::find(BaseRegs.begin(), BaseRegs.end(), S) != BaseRegs.end();
+}
 
-    for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
-      if (Instruction *U = dyn_cast<Instruction>(*OI)) {
-        *OI = 0;
-        if (U->use_empty())
-          DeadInsts.push_back(U);
+/// hasRegsUsedByUsesOtherThan - Test whether this formula uses registers
+/// which are used by uses other than the use with the given index.
+bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
+                                         const RegUseTracker &RegUses) const {
+  if (ScaledReg)
+    if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
+      return true;
+  for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
+       E = BaseRegs.end(); I != E; ++I)
+    if (RegUses.isRegUsedByUsesOtherThan(*I, LUIdx))
+      return true;
+  return false;
+}
+
+void Formula::print(raw_ostream &OS) const {
+  bool First = true;
+  if (AM.BaseGV) {
+    if (!First) OS << " + "; else First = false;
+    WriteAsOperand(OS, AM.BaseGV, /*PrintType=*/false);
+  }
+  if (AM.BaseOffs != 0) {
+    if (!First) OS << " + "; else First = false;
+    OS << AM.BaseOffs;
+  }
+  for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
+       E = BaseRegs.end(); I != E; ++I) {
+    if (!First) OS << " + "; else First = false;
+    OS << "reg(" << **I << ')';
+  }
+  if (AM.Scale != 0) {
+    if (!First) OS << " + "; else First = false;
+    OS << AM.Scale << "*reg(";
+    if (ScaledReg)
+      OS << *ScaledReg;
+    else
+      OS << "<unknown>";
+    OS << ')';
+  }
+}
+
+void Formula::dump() const {
+  print(errs()); errs() << '\n';
+}
+
+/// isAddRecSExtable - Return true if the given addrec can be sign-extended
+/// without changing its value.
+static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
+  const Type *WideTy =
+    IntegerType::get(SE.getContext(),
+                     SE.getTypeSizeInBits(AR->getType()) + 1);
+  return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
+}
+
+/// isAddSExtable - Return true if the given add can be sign-extended
+/// without changing its value.
+static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
+  const Type *WideTy =
+    IntegerType::get(SE.getContext(),
+                     SE.getTypeSizeInBits(A->getType()) + 1);
+  return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
+}
+
+/// isMulSExtable - Return true if the given add can be sign-extended
+/// without changing its value.
+static bool isMulSExtable(const SCEVMulExpr *A, ScalarEvolution &SE) {
+  const Type *WideTy =
+    IntegerType::get(SE.getContext(),
+                     SE.getTypeSizeInBits(A->getType()) + 1);
+  return isa<SCEVMulExpr>(SE.getSignExtendExpr(A, WideTy));
+}
+
+/// getExactSDiv - Return an expression for LHS /s RHS, if it can be determined
+/// and if the remainder is known to be zero,  or null otherwise. If
+/// IgnoreSignificantBits is true, expressions like (X * Y) /s Y are simplified
+/// to Y, ignoring that the multiplication may overflow, which is useful when
+/// the result will be used in a context where the most significant bits are
+/// ignored.
+static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
+                                ScalarEvolution &SE,
+                                bool IgnoreSignificantBits = false) {
+  // Handle the trivial case, which works for any SCEV type.
+  if (LHS == RHS)
+    return SE.getIntegerSCEV(1, LHS->getType());
+
+  // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do some
+  // folding.
+  if (RHS->isAllOnesValue())
+    return SE.getMulExpr(LHS, RHS);
+
+  // Check for a division of a constant by a constant.
+  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
+    const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
+    if (!RC)
+      return 0;
+    if (C->getValue()->getValue().srem(RC->getValue()->getValue()) != 0)
+      return 0;
+    return SE.getConstant(C->getValue()->getValue()
+               .sdiv(RC->getValue()->getValue()));
+  }
+
+  // Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
+    if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) {
+      const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
+                                       IgnoreSignificantBits);
+      if (!Start) return 0;
+      const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
+                                      IgnoreSignificantBits);
+      if (!Step) return 0;
+      return SE.getAddRecExpr(Start, Step, AR->getLoop());
+    }
+  }
+
+  // Distribute the sdiv over add operands, if the add doesn't overflow.
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {
+    if (IgnoreSignificantBits || isAddSExtable(Add, SE)) {
+      SmallVector<const SCEV *, 8> Ops;
+      for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+           I != E; ++I) {
+        const SCEV *Op = getExactSDiv(*I, RHS, SE,
+                                      IgnoreSignificantBits);
+        if (!Op) return 0;
+        Ops.push_back(Op);
       }
+      return SE.getAddExpr(Ops);
+    }
+  }
 
-    I->eraseFromParent();
-    Changed = true;
+  // Check for a multiply operand that we can pull RHS out of.
+  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS))
+    if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) {
+      SmallVector<const SCEV *, 4> Ops;
+      bool Found = false;
+      for (SCEVMulExpr::op_iterator I = Mul->op_begin(), E = Mul->op_end();
+           I != E; ++I) {
+        if (!Found)
+          if (const SCEV *Q = getExactSDiv(*I, RHS, SE,
+                                           IgnoreSignificantBits)) {
+            Ops.push_back(Q);
+            Found = true;
+            continue;
+          }
+        Ops.push_back(*I);
+      }
+      return Found ? SE.getMulExpr(Ops) : 0;
+    }
+
+  // Otherwise we don't know.
+  return 0;
+}
+
+/// ExtractImmediate - If S involves the addition of a constant integer value,
+/// return that integer value, and mutate S to point to a new SCEV with that
+/// value excluded.
+static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
+  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
+    if (C->getValue()->getValue().getMinSignedBits() <= 64) {
+      S = SE.getIntegerSCEV(0, C->getType());
+      return C->getValue()->getSExtValue();
+    }
+  } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
+    int64_t Result = ExtractImmediate(NewOps.front(), SE);
+    S = SE.getAddExpr(NewOps);
+    return Result;
+  } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
+    int64_t Result = ExtractImmediate(NewOps.front(), SE);
+    S = SE.getAddRecExpr(NewOps, AR->getLoop());
+    return Result;
+  }
+  return 0;
+}
+
+/// ExtractSymbol - If S involves the addition of a GlobalValue address,
+/// return that symbol, and mutate S to point to a new SCEV with that
+/// value excluded.
+static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
+  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+    if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
+      S = SE.getIntegerSCEV(0, GV->getType());
+      return GV;
+    }
+  } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
+    GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);
+    S = SE.getAddExpr(NewOps);
+    return Result;
+  } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
+    GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
+    S = SE.getAddRecExpr(NewOps, AR->getLoop());
+    return Result;
   }
+  return 0;
 }
 
 /// isAddressUse - Returns true if the specified instruction is using the
@@ -276,1776 +536,833 @@ static const Type *getAccessType(const Instruction *Inst) {
       break;
     }
   }
-  return AccessTy;
-}
 
-namespace {
-  /// BasedUser - For a particular base value, keep information about how we've
-  /// partitioned the expression so far.
-  struct BasedUser {
-    /// Base - The Base value for the PHI node that needs to be inserted for
-    /// this use.  As the use is processed, information gets moved from this
-    /// field to the Imm field (below).  BasedUser values are sorted by this
-    /// field.
-    const SCEV *Base;
-
-    /// Inst - The instruction using the induction variable.
-    Instruction *Inst;
-
-    /// OperandValToReplace - The operand value of Inst to replace with the
-    /// EmittedBase.
-    Value *OperandValToReplace;
-
-    /// Imm - The immediate value that should be added to the base immediately
-    /// before Inst, because it will be folded into the imm field of the
-    /// instruction.  This is also sometimes used for loop-variant values that
-    /// must be added inside the loop.
-    const SCEV *Imm;
-
-    /// Phi - The induction variable that performs the striding that
-    /// should be used for this user.
-    PHINode *Phi;
-
-    // isUseOfPostIncrementedValue - True if this should use the
-    // post-incremented version of this IV, not the preincremented version.
-    // This can only be set in special cases, such as the terminating setcc
-    // instruction for a loop and uses outside the loop that are dominated by
-    // the loop.
-    bool isUseOfPostIncrementedValue;
-
-    BasedUser(IVStrideUse &IVSU, ScalarEvolution *se)
-      : Base(IVSU.getOffset()), Inst(IVSU.getUser()),
-        OperandValToReplace(IVSU.getOperandValToReplace()),
-        Imm(se->getIntegerSCEV(0, Base->getType())),
-        isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {}
-
-    // Once we rewrite the code to insert the new IVs we want, update the
-    // operands of Inst to use the new expression 'NewBase', with 'Imm' added
-    // to it.
-    void RewriteInstructionToUseNewBase(const SCEV *NewBase,
-                                        Instruction *InsertPt,
-                                       SCEVExpander &Rewriter, Loop *L, Pass *P,
-                                        SmallVectorImpl<WeakVH> &DeadInsts,
-                                        ScalarEvolution *SE);
-
-    Value *InsertCodeForBaseAtPosition(const SCEV *NewBase,
-                                       const Type *Ty,
-                                       SCEVExpander &Rewriter,
-                                       Instruction *IP,
-                                       ScalarEvolution *SE);
-    void dump() const;
-  };
-}
+  // All pointers have the same requirements, so canonicalize them to an
+  // arbitrary pointer type to minimize variation.
+  if (const PointerType *PTy = dyn_cast<PointerType>(AccessTy))
+    AccessTy = PointerType::get(IntegerType::get(PTy->getContext(), 1),
+                                PTy->getAddressSpace());
 
-void BasedUser::dump() const {
-  dbgs() << " Base=" << *Base;
-  dbgs() << " Imm=" << *Imm;
-  dbgs() << "   Inst: " << *Inst;
+  return AccessTy;
 }
 
-Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *NewBase,
-                                              const Type *Ty,
-                                              SCEVExpander &Rewriter,
-                                              Instruction *IP,
-                                              ScalarEvolution *SE) {
-  Value *Base = Rewriter.expandCodeFor(NewBase, 0, IP);
-
-  // Wrap the base in a SCEVUnknown so that ScalarEvolution doesn't try to
-  // re-analyze it.
-  const SCEV *NewValSCEV = SE->getUnknown(Base);
-
-  // Always emit the immediate into the same block as the user.
-  NewValSCEV = SE->getAddExpr(NewValSCEV, Imm);
-
-  return Rewriter.expandCodeFor(NewValSCEV, Ty, IP);
-}
+/// DeleteTriviallyDeadInstructions - If any of the instructions is the
+/// specified set are trivially dead, delete them and see if this makes any of
+/// their operands subsequently dead.
+static bool
+DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
+  bool Changed = false;
 
+  while (!DeadInsts.empty()) {
+    Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
 
-// Once we rewrite the code to insert the new IVs we want, update the
-// operands of Inst to use the new expression 'NewBase', with 'Imm' added
-// to it. NewBasePt is the last instruction which contributes to the
-// value of NewBase in the case that it's a diffferent instruction from
-// the PHI that NewBase is computed from, or null otherwise.
-//
-void BasedUser::RewriteInstructionToUseNewBase(const SCEV *NewBase,
-                                               Instruction *NewBasePt,
-                                      SCEVExpander &Rewriter, Loop *L, Pass *P,
-                                      SmallVectorImpl<WeakVH> &DeadInsts,
-                                      ScalarEvolution *SE) {
-  if (!isa<PHINode>(Inst)) {
-    // By default, insert code at the user instruction.
-    BasicBlock::iterator InsertPt = Inst;
-
-    // However, if the Operand is itself an instruction, the (potentially
-    // complex) inserted code may be shared by many users.  Because of this, we
-    // want to emit code for the computation of the operand right before its old
-    // computation.  This is usually safe, because we obviously used to use the
-    // computation when it was computed in its current block.  However, in some
-    // cases (e.g. use of a post-incremented induction variable) the NewBase
-    // value will be pinned to live somewhere after the original computation.
-    // In this case, we have to back off.
-    //
-    // If this is a use outside the loop (which means after, since it is based
-    // on a loop indvar) we use the post-incremented value, so that we don't
-    // artificially make the preinc value live out the bottom of the loop.
-    if (!isUseOfPostIncrementedValue && L->contains(Inst)) {
-      if (NewBasePt && isa<PHINode>(OperandValToReplace)) {
-        InsertPt = NewBasePt;
-        ++InsertPt;
-      } else if (Instruction *OpInst
-                 = dyn_cast<Instruction>(OperandValToReplace)) {
-        InsertPt = OpInst;
-        while (isa<PHINode>(InsertPt)) ++InsertPt;
-      }
-    }
-    Value *NewVal = InsertCodeForBaseAtPosition(NewBase,
-                                                OperandValToReplace->getType(),
-                                                Rewriter, InsertPt, SE);
-    // Replace the use of the operand Value with the new Phi we just created.
-    Inst->replaceUsesOfWith(OperandValToReplace, NewVal);
-
-    DEBUG(dbgs() << "      Replacing with ");
-    DEBUG(WriteAsOperand(dbgs(), NewVal, /*PrintType=*/false));
-    DEBUG(dbgs() << ", which has value " << *NewBase << " plus IMM "
-                 << *Imm << "\n");
-    return;
-  }
+    if (I == 0 || !isInstructionTriviallyDead(I))
+      continue;
 
-  // PHI nodes are more complex.  We have to insert one copy of the NewBase+Imm
-  // expression into each operand block that uses it.  Note that PHI nodes can
-  // have multiple entries for the same predecessor.  We use a map to make sure
-  // that a PHI node only has a single Value* for each predecessor (which also
-  // prevents us from inserting duplicate code in some blocks).
-  DenseMap<BasicBlock*, Value*> InsertedCode;
-  PHINode *PN = cast<PHINode>(Inst);
-  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
-    if (PN->getIncomingValue(i) == OperandValToReplace) {
-      // If the original expression is outside the loop, put the replacement
-      // code in the same place as the original expression,
-      // which need not be an immediate predecessor of this PHI.  This way we
-      // need only one copy of it even if it is referenced multiple times in
-      // the PHI.  We don't do this when the original expression is inside the
-      // loop because multiple copies sometimes do useful sinking of code in
-      // that case(?).
-      Instruction *OldLoc = dyn_cast<Instruction>(OperandValToReplace);
-      BasicBlock *PHIPred = PN->getIncomingBlock(i);
-      if (L->contains(OldLoc)) {
-        // If this is a critical edge, split the edge so that we do not insert
-        // the code on all predecessor/successor paths.  We do this unless this
-        // is the canonical backedge for this loop, as this can make some
-        // inserted code be in an illegal position.
-        if (e != 1 && PHIPred->getTerminator()->getNumSuccessors() > 1 &&
-            !isa<IndirectBrInst>(PHIPred->getTerminator()) &&
-            (PN->getParent() != L->getHeader() || !L->contains(PHIPred))) {
-
-          // First step, split the critical edge.
-          BasicBlock *NewBB = SplitCriticalEdge(PHIPred, PN->getParent(),
-                                                P, false);
-
-          // Next step: move the basic block.  In particular, if the PHI node
-          // is outside of the loop, and PredTI is in the loop, we want to
-          // move the block to be immediately before the PHI block, not
-          // immediately after PredTI.
-          if (L->contains(PHIPred) && !L->contains(PN))
-            NewBB->moveBefore(PN->getParent());
-
-          // Splitting the edge can reduce the number of PHI entries we have.
-          e = PN->getNumIncomingValues();
-          PHIPred = NewBB;
-          i = PN->getBasicBlockIndex(PHIPred);
-        }
-      }
-      Value *&Code = InsertedCode[PHIPred];
-      if (!Code) {
-        // Insert the code into the end of the predecessor block.
-        Instruction *InsertPt = (L->contains(OldLoc)) ?
-                                PHIPred->getTerminator() :
-                                OldLoc->getParent()->getTerminator();
-        Code = InsertCodeForBaseAtPosition(NewBase, PN->getType(),
-                                           Rewriter, InsertPt, SE);
-
-        DEBUG(dbgs() << "      Changing PHI use to ");
-        DEBUG(WriteAsOperand(dbgs(), Code, /*PrintType=*/false));
-        DEBUG(dbgs() << ", which has value " << *NewBase << " plus IMM "
-                     << *Imm << "\n");
+    for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
+      if (Instruction *U = dyn_cast<Instruction>(*OI)) {
+        *OI = 0;
+        if (U->use_empty())
+          DeadInsts.push_back(U);
       }
 
-      // Replace the use of the operand Value with the new Phi we just created.
-      PN->setIncomingValue(i, Code);
-      Rewriter.clear();
-    }
+    I->eraseFromParent();
+    Changed = true;
   }
 
-  // PHI node might have become a constant value after SplitCriticalEdge.
-  DeadInsts.push_back(Inst);
+  return Changed;
 }
 
+namespace {
 
-/// fitsInAddressMode - Return true if V can be subsumed within an addressing
-/// mode, and does not need to be put in a register first.
-static bool fitsInAddressMode(const SCEV *V, const Type *AccessTy,
-                             const TargetLowering *TLI, bool HasBaseReg) {
-  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(V)) {
-    int64_t VC = SC->getValue()->getSExtValue();
-    if (TLI) {
-      TargetLowering::AddrMode AM;
-      AM.BaseOffs = VC;
-      AM.HasBaseReg = HasBaseReg;
-      return TLI->isLegalAddressingMode(AM, AccessTy);
-    } else {
-      // Defaults to PPC. PPC allows a sign-extended 16-bit immediate field.
-      return (VC > -(1 << 16) && VC < (1 << 16)-1);
-    }
-  }
-
-  if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V))
-    if (GlobalValue *GV = dyn_cast<GlobalValue>(SU->getValue())) {
-      if (TLI) {
-        TargetLowering::AddrMode AM;
-        AM.BaseGV = GV;
-        AM.HasBaseReg = HasBaseReg;
-        return TLI->isLegalAddressingMode(AM, AccessTy);
-      } else {
-        // Default: assume global addresses are not legal.
-      }
-    }
+/// Cost - This class is used to measure and compare candidate formulae.
+class Cost {
+  /// TODO: Some of these could be merged. Also, a lexical ordering
+  /// isn't always optimal.
+  unsigned NumRegs;
+  unsigned AddRecCost;
+  unsigned NumIVMuls;
+  unsigned NumBaseAdds;
+  unsigned ImmCost;
+  unsigned SetupCost;
+
+public:
+  Cost()
+    : NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0),
+      SetupCost(0) {}
+
+  unsigned getNumRegs() const { return NumRegs; }
+
+  bool operator<(const Cost &Other) const;
+
+  void Loose();
+
+  void RateFormula(const Formula &F,
+                   SmallPtrSet<const SCEV *, 16> &Regs,
+                   const DenseSet<const SCEV *> &VisitedRegs,
+                   const Loop *L,
+                   const SmallVectorImpl<int64_t> &Offsets,
+                   ScalarEvolution &SE, DominatorTree &DT);
+
+  void print(raw_ostream &OS) const;
+  void dump() const;
+
+private:
+  void RateRegister(const SCEV *Reg,
+                    SmallPtrSet<const SCEV *, 16> &Regs,
+                    const Loop *L,
+                    ScalarEvolution &SE, DominatorTree &DT);
+  void RatePrimaryRegister(const SCEV *Reg,
+                           SmallPtrSet<const SCEV *, 16> &Regs,
+                           const Loop *L,
+                           ScalarEvolution &SE, DominatorTree &DT);
+};
 
-  return false;
 }
 
-/// MoveLoopVariantsToImmediateField - Move any subexpressions from Val that are
-/// loop varying to the Imm operand.
-static void MoveLoopVariantsToImmediateField(const SCEV *&Val, const SCEV *&Imm,
-                                             Loop *L, ScalarEvolution *SE) {
-  if (Val->isLoopInvariant(L)) return;  // Nothing to do.
-
-  if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
-    SmallVector<const SCEV *, 4> NewOps;
-    NewOps.reserve(SAE->getNumOperands());
+/// RateRegister - Tally up interesting quantities from the given register.
+void Cost::RateRegister(const SCEV *Reg,
+                        SmallPtrSet<const SCEV *, 16> &Regs,
+                        const Loop *L,
+                        ScalarEvolution &SE, DominatorTree &DT) {
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
+    if (AR->getLoop() == L)
+      AddRecCost += 1; /// TODO: This should be a function of the stride.
+
+    // If this is an addrec for a loop that's already been visited by LSR,
+    // don't second-guess its addrec phi nodes. LSR isn't currently smart
+    // enough to reason about more than one loop at a time. Consider these
+    // registers free and leave them alone.
+    else if (L->contains(AR->getLoop()) ||
+             (!AR->getLoop()->contains(L) &&
+              DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))) {
+      for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
+           PHINode *PN = dyn_cast<PHINode>(I); ++I)
+        if (SE.isSCEVable(PN->getType()) &&
+            (SE.getEffectiveSCEVType(PN->getType()) ==
+             SE.getEffectiveSCEVType(AR->getType())) &&
+            SE.getSCEV(PN) == AR)
+          return;
 
-    for (unsigned i = 0; i != SAE->getNumOperands(); ++i)
-      if (!SAE->getOperand(i)->isLoopInvariant(L)) {
-        // If this is a loop-variant expression, it must stay in the immediate
-        // field of the expression.
-        Imm = SE->getAddExpr(Imm, SAE->getOperand(i));
-      } else {
-        NewOps.push_back(SAE->getOperand(i));
-      }
+      // If this isn't one of the addrecs that the loop already has, it
+      // would require a costly new phi and add. TODO: This isn't
+      // precisely modeled right now.
+      ++NumBaseAdds;
+      if (!Regs.count(AR->getStart()))
+        RateRegister(AR->getStart(), Regs, L, SE, DT);
+    }
 
-    if (NewOps.empty())
-      Val = SE->getIntegerSCEV(0, Val->getType());
-    else
-      Val = SE->getAddExpr(NewOps);
-  } else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) {
-    // Try to pull immediates out of the start value of nested addrec's.
-    const SCEV *Start = SARE->getStart();
-    MoveLoopVariantsToImmediateField(Start, Imm, L, SE);
-
-    SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
-    Ops[0] = Start;
-    Val = SE->getAddRecExpr(Ops, SARE->getLoop());
-  } else {
-    // Otherwise, all of Val is variant, move the whole thing over.
-    Imm = SE->getAddExpr(Imm, Val);
-    Val = SE->getIntegerSCEV(0, Val->getType());
+    // Add the step value register, if it needs one.
+    // TODO: The non-affine case isn't precisely modeled here.
+    if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1)))
+      if (!Regs.count(AR->getStart()))
+        RateRegister(AR->getOperand(1), Regs, L, SE, DT);
   }
+  ++NumRegs;
+
+  // Rough heuristic; favor registers which don't require extra setup
+  // instructions in the preheader.
+  if (!isa<SCEVUnknown>(Reg) &&
+      !isa<SCEVConstant>(Reg) &&
+      !(isa<SCEVAddRecExpr>(Reg) &&
+        (isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) ||
+         isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
+    ++SetupCost;
 }
 
+/// RatePrimaryRegister - Record this register in the set. If we haven't seen it
+/// before, rate it.
+void Cost::RatePrimaryRegister(const SCEV *Reg,
+                               SmallPtrSet<const SCEV *, 16> &Regs,
+                               const Loop *L,
+                               ScalarEvolution &SE, DominatorTree &DT) {
+  if (Regs.insert(Reg))
+    RateRegister(Reg, Regs, L, SE, DT);
+}
 
-/// MoveImmediateValues - Look at Val, and pull out any additions of constants
-/// that can fit into the immediate field of instructions in the target.
-/// Accumulate these immediate values into the Imm value.
-static void MoveImmediateValues(const TargetLowering *TLI,
-                                const Type *AccessTy,
-                                const SCEV *&Val, const SCEV *&Imm,
-                                bool isAddress, Loop *L,
-                                ScalarEvolution *SE) {
-  if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
-    SmallVector<const SCEV *, 4> NewOps;
-    NewOps.reserve(SAE->getNumOperands());
-
-    for (unsigned i = 0; i != SAE->getNumOperands(); ++i) {
-      const SCEV *NewOp = SAE->getOperand(i);
-      MoveImmediateValues(TLI, AccessTy, NewOp, Imm, isAddress, L, SE);
-
-      if (!NewOp->isLoopInvariant(L)) {
-        // If this is a loop-variant expression, it must stay in the immediate
-        // field of the expression.
-        Imm = SE->getAddExpr(Imm, NewOp);
-      } else {
-        NewOps.push_back(NewOp);
-      }
-    }
-
-    if (NewOps.empty())
-      Val = SE->getIntegerSCEV(0, Val->getType());
-    else
-      Val = SE->getAddExpr(NewOps);
-    return;
-  } else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) {
-    // Try to pull immediates out of the start value of nested addrec's.
-    const SCEV *Start = SARE->getStart();
-    MoveImmediateValues(TLI, AccessTy, Start, Imm, isAddress, L, SE);
-
-    if (Start != SARE->getStart()) {
-      SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
-      Ops[0] = Start;
-      Val = SE->getAddRecExpr(Ops, SARE->getLoop());
+void Cost::RateFormula(const Formula &F,
+                       SmallPtrSet<const SCEV *, 16> &Regs,
+                       const DenseSet<const SCEV *> &VisitedRegs,
+                       const Loop *L,
+                       const SmallVectorImpl<int64_t> &Offsets,
+                       ScalarEvolution &SE, DominatorTree &DT) {
+  // Tally up the registers.
+  if (const SCEV *ScaledReg = F.ScaledReg) {
+    if (VisitedRegs.count(ScaledReg)) {
+      Loose();
+      return;
     }
-    return;
-  } else if (const SCEVMulExpr *SME = dyn_cast<SCEVMulExpr>(Val)) {
-    // Transform "8 * (4 + v)" -> "32 + 8*V" if "32" fits in the immed field.
-    if (isAddress &&
-        fitsInAddressMode(SME->getOperand(0), AccessTy, TLI, false) &&
-        SME->getNumOperands() == 2 && SME->isLoopInvariant(L)) {
-
-      const SCEV *SubImm = SE->getIntegerSCEV(0, Val->getType());
-      const SCEV *NewOp = SME->getOperand(1);
-      MoveImmediateValues(TLI, AccessTy, NewOp, SubImm, isAddress, L, SE);
-
-      // If we extracted something out of the subexpressions, see if we can
-      // simplify this!
-      if (NewOp != SME->getOperand(1)) {
-        // Scale SubImm up by "8".  If the result is a target constant, we are
-        // good.
-        SubImm = SE->getMulExpr(SubImm, SME->getOperand(0));
-        if (fitsInAddressMode(SubImm, AccessTy, TLI, false)) {
-          // Accumulate the immediate.
-          Imm = SE->getAddExpr(Imm, SubImm);
-
-          // Update what is left of 'Val'.
-          Val = SE->getMulExpr(SME->getOperand(0), NewOp);
-          return;
-        }
-      }
+    RatePrimaryRegister(ScaledReg, Regs, L, SE, DT);
+  }
+  for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
+       E = F.BaseRegs.end(); I != E; ++I) {
+    const SCEV *BaseReg = *I;
+    if (VisitedRegs.count(BaseReg)) {
+      Loose();
+      return;
     }
+    RatePrimaryRegister(BaseReg, Regs, L, SE, DT);
+
+    NumIVMuls += isa<SCEVMulExpr>(BaseReg) &&
+                 BaseReg->hasComputableLoopEvolution(L);
   }
 
-  // Loop-variant expressions must stay in the immediate field of the
-  // expression.
-  if ((isAddress && fitsInAddressMode(Val, AccessTy, TLI, false)) ||
-      !Val->isLoopInvariant(L)) {
-    Imm = SE->getAddExpr(Imm, Val);
-    Val = SE->getIntegerSCEV(0, Val->getType());
-    return;
+  if (F.BaseRegs.size() > 1)
+    NumBaseAdds += F.BaseRegs.size() - 1;
+
+  // Tally up the non-zero immediates.
+  for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
+       E = Offsets.end(); I != E; ++I) {
+    int64_t Offset = (uint64_t)*I + F.AM.BaseOffs;
+    if (F.AM.BaseGV)
+      ImmCost += 64; // Handle symbolic values conservatively.
+                     // TODO: This should probably be the pointer size.
+    else if (Offset != 0)
+      ImmCost += APInt(64, Offset, true).getMinSignedBits();
   }
+}
 
-  // Otherwise, no immediates to move.
+/// Loose - Set this cost to a loosing value.
+void Cost::Loose() {
+  NumRegs = ~0u;
+  AddRecCost = ~0u;
+  NumIVMuls = ~0u;
+  NumBaseAdds = ~0u;
+  ImmCost = ~0u;
+  SetupCost = ~0u;
 }
 
-static void MoveImmediateValues(const TargetLowering *TLI,
-                                Instruction *User,
-                                const SCEV *&Val, const SCEV *&Imm,
-                                bool isAddress, Loop *L,
-                                ScalarEvolution *SE) {
-  const Type *AccessTy = getAccessType(User);
-  MoveImmediateValues(TLI, AccessTy, Val, Imm, isAddress, L, SE);
+/// operator< - Choose the lower cost.
+bool Cost::operator<(const Cost &Other) const {
+  if (NumRegs != Other.NumRegs)
+    return NumRegs < Other.NumRegs;
+  if (AddRecCost != Other.AddRecCost)
+    return AddRecCost < Other.AddRecCost;
+  if (NumIVMuls != Other.NumIVMuls)
+    return NumIVMuls < Other.NumIVMuls;
+  if (NumBaseAdds != Other.NumBaseAdds)
+    return NumBaseAdds < Other.NumBaseAdds;
+  if (ImmCost != Other.ImmCost)
+    return ImmCost < Other.ImmCost;
+  if (SetupCost != Other.SetupCost)
+    return SetupCost < Other.SetupCost;
+  return false;
 }
 
-/// SeparateSubExprs - Decompose Expr into all of the subexpressions that are
-/// added together.  This is used to reassociate common addition subexprs
-/// together for maximal sharing when rewriting bases.
-static void SeparateSubExprs(SmallVector<const SCEV *, 16> &SubExprs,
-                             const SCEV *Expr,
-                             ScalarEvolution *SE) {
-  if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(Expr)) {
-    for (unsigned j = 0, e = AE->getNumOperands(); j != e; ++j)
-      SeparateSubExprs(SubExprs, AE->getOperand(j), SE);
-  } else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Expr)) {
-    const SCEV *Zero = SE->getIntegerSCEV(0, Expr->getType());
-    if (SARE->getOperand(0) == Zero) {
-      SubExprs.push_back(Expr);
-    } else {
-      // Compute the addrec with zero as its base.
-      SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
-      Ops[0] = Zero;   // Start with zero base.
-      SubExprs.push_back(SE->getAddRecExpr(Ops, SARE->getLoop()));
+void Cost::print(raw_ostream &OS) const {
+  OS << NumRegs << " reg" << (NumRegs == 1 ? "" : "s");
+  if (AddRecCost != 0)
+    OS << ", with addrec cost " << AddRecCost;
+  if (NumIVMuls != 0)
+    OS << ", plus " << NumIVMuls << " IV mul" << (NumIVMuls == 1 ? "" : "s");
+  if (NumBaseAdds != 0)
+    OS << ", plus " << NumBaseAdds << " base add"
+       << (NumBaseAdds == 1 ? "" : "s");
+  if (ImmCost != 0)
+    OS << ", plus " << ImmCost << " imm cost";
+  if (SetupCost != 0)
+    OS << ", plus " << SetupCost << " setup cost";
+}
 
+void Cost::dump() const {
+  print(errs()); errs() << '\n';
+}
 
-      SeparateSubExprs(SubExprs, SARE->getOperand(0), SE);
-    }
-  } else if (!Expr->isZero()) {
-    // Do not add zero.
-    SubExprs.push_back(Expr);
-  }
-}
-
-// This is logically local to the following function, but C++ says we have
-// to make it file scope.
-struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; };
-
-/// RemoveCommonExpressionsFromUseBases - Look through all of the Bases of all
-/// the Uses, removing any common subexpressions, except that if all such
-/// subexpressions can be folded into an addressing mode for all uses inside
-/// the loop (this case is referred to as "free" in comments herein) we do
-/// not remove anything.  This looks for things like (a+b+c) and
-/// (a+c+d) and computes the common (a+c) subexpression.  The common expression
-/// is *removed* from the Bases and returned.
-static const SCEV *
-RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
-                                    ScalarEvolution *SE, Loop *L,
-                                    const TargetLowering *TLI) {
-  unsigned NumUses = Uses.size();
-
-  // Only one use?  This is a very common case, so we handle it specially and
-  // cheaply.
-  const SCEV *Zero = SE->getIntegerSCEV(0, Uses[0].Base->getType());
-  const SCEV *Result = Zero;
-  const SCEV *FreeResult = Zero;
-  if (NumUses == 1) {
-    // If the use is inside the loop, use its base, regardless of what it is:
-    // it is clearly shared across all the IV's.  If the use is outside the loop
-    // (which means after it) we don't want to factor anything *into* the loop,
-    // so just use 0 as the base.
-    if (L->contains(Uses[0].Inst))
-      std::swap(Result, Uses[0].Base);
-    return Result;
-  }
+namespace {
 
-  // To find common subexpressions, count how many of Uses use each expression.
-  // If any subexpressions are used Uses.size() times, they are common.
-  // Also track whether all uses of each expression can be moved into an
-  // an addressing mode "for free"; such expressions are left within the loop.
-  // struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; };
-  std::map<const SCEV *, SubExprUseData> SubExpressionUseData;
-
-  // UniqueSubExprs - Keep track of all of the subexpressions we see in the
-  // order we see them.
-  SmallVector<const SCEV *, 16> UniqueSubExprs;
-
-  SmallVector<const SCEV *, 16> SubExprs;
-  unsigned NumUsesInsideLoop = 0;
-  for (unsigned i = 0; i != NumUses; ++i) {
-    // If the user is outside the loop, just ignore it for base computation.
-    // Since the user is outside the loop, it must be *after* the loop (if it
-    // were before, it could not be based on the loop IV).  We don't want users
-    // after the loop to affect base computation of values *inside* the loop,
-    // because we can always add their offsets to the result IV after the loop
-    // is done, ensuring we get good code inside the loop.
-    if (!L->contains(Uses[i].Inst))
-      continue;
-    NumUsesInsideLoop++;
+/// LSRFixup - An operand value in an instruction which is to be replaced
+/// with some equivalent, possibly strength-reduced, replacement.
+struct LSRFixup {
+  /// UserInst - The instruction which will be updated.
+  Instruction *UserInst;
 
-    // If the base is zero (which is common), return zero now, there are no
-    // CSEs we can find.
-    if (Uses[i].Base == Zero) return Zero;
+  /// OperandValToReplace - The operand of the instruction which will
+  /// be replaced. The operand may be used more than once; every instance
+  /// will be replaced.
+  Value *OperandValToReplace;
 
-    // If this use is as an address we may be able to put CSEs in the addressing
-    // mode rather than hoisting them.
-    bool isAddrUse = isAddressUse(Uses[i].Inst, Uses[i].OperandValToReplace);
-    // We may need the AccessTy below, but only when isAddrUse, so compute it
-    // only in that case.
-    const Type *AccessTy = 0;
-    if (isAddrUse)
-      AccessTy = getAccessType(Uses[i].Inst);
-
-    // Split the expression into subexprs.
-    SeparateSubExprs(SubExprs, Uses[i].Base, SE);
-    // Add one to SubExpressionUseData.Count for each subexpr present, and
-    // if the subexpr is not a valid immediate within an addressing mode use,
-    // set SubExpressionUseData.notAllUsesAreFree.  We definitely want to
-    // hoist these out of the loop (if they are common to all uses).
-    for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) {
-      if (++SubExpressionUseData[SubExprs[j]].Count == 1)
-        UniqueSubExprs.push_back(SubExprs[j]);
-      if (!isAddrUse || !fitsInAddressMode(SubExprs[j], AccessTy, TLI, false))
-        SubExpressionUseData[SubExprs[j]].notAllUsesAreFree = true;
-    }
-    SubExprs.clear();
-  }
-
-  // Now that we know how many times each is used, build Result.  Iterate over
-  // UniqueSubexprs so that we have a stable ordering.
-  for (unsigned i = 0, e = UniqueSubExprs.size(); i != e; ++i) {
-    std::map<const SCEV *, SubExprUseData>::iterator I =
-       SubExpressionUseData.find(UniqueSubExprs[i]);
-    assert(I != SubExpressionUseData.end() && "Entry not found?");
-    if (I->second.Count == NumUsesInsideLoop) { // Found CSE!
-      if (I->second.notAllUsesAreFree)
-        Result = SE->getAddExpr(Result, I->first);
-      else
-        FreeResult = SE->getAddExpr(FreeResult, I->first);
-    } else
-      // Remove non-cse's from SubExpressionUseData.
-      SubExpressionUseData.erase(I);
-  }
-
-  if (FreeResult != Zero) {
-    // We have some subexpressions that can be subsumed into addressing
-    // modes in every use inside the loop.  However, it's possible that
-    // there are so many of them that the combined FreeResult cannot
-    // be subsumed, or that the target cannot handle both a FreeResult
-    // and a Result in the same instruction (for example because it would
-    // require too many registers).  Check this.
-    for (unsigned i=0; i<NumUses; ++i) {
-      if (!L->contains(Uses[i].Inst))
-        continue;
-      // We know this is an addressing mode use; if there are any uses that
-      // are not, FreeResult would be Zero.
-      const Type *AccessTy = getAccessType(Uses[i].Inst);
-      if (!fitsInAddressMode(FreeResult, AccessTy, TLI, Result!=Zero)) {
-        // FIXME:  could split up FreeResult into pieces here, some hoisted
-        // and some not.  There is no obvious advantage to this.
-        Result = SE->getAddExpr(Result, FreeResult);
-        FreeResult = Zero;
-        break;
-      }
-    }
-  }
+  /// PostIncLoop - If this user is to use the post-incremented value of an
+  /// induction variable, this variable is non-null and holds the loop
+  /// associated with the induction variable.
+  const Loop *PostIncLoop;
 
-  // If we found no CSE's, return now.
-  if (Result == Zero) return Result;
+  /// LUIdx - The index of the LSRUse describing the expression which
+  /// this fixup needs, minus an offset (below).
+  size_t LUIdx;
 
-  // If we still have a FreeResult, remove its subexpressions from
-  // SubExpressionUseData.  This means they will remain in the use Bases.
-  if (FreeResult != Zero) {
-    SeparateSubExprs(SubExprs, FreeResult, SE);
-    for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) {
-      std::map<const SCEV *, SubExprUseData>::iterator I =
-         SubExpressionUseData.find(SubExprs[j]);
-      SubExpressionUseData.erase(I);
-    }
-    SubExprs.clear();
-  }
+  /// Offset - A constant offset to be added to the LSRUse expression.
+  /// This allows multiple fixups to share the same LSRUse with different
+  /// offsets, for example in an unrolled loop.
+  int64_t Offset;
 
-  // Otherwise, remove all of the CSE's we found from each of the base values.
-  for (unsigned i = 0; i != NumUses; ++i) {
-    // Uses outside the loop don't necessarily include the common base, but
-    // the final IV value coming into those uses does.  Instead of trying to
-    // remove the pieces of the common base, which might not be there,
-    // subtract off the base to compensate for this.
-    if (!L->contains(Uses[i].Inst)) {
-      Uses[i].Base = SE->getMinusSCEV(Uses[i].Base, Result);
-      continue;
-    }
+  LSRFixup();
 
-    // Split the expression into subexprs.
-    SeparateSubExprs(SubExprs, Uses[i].Base, SE);
+  void print(raw_ostream &OS) const;
+  void dump() const;
+};
 
-    // Remove any common subexpressions.
-    for (unsigned j = 0, e = SubExprs.size(); j != e; ++j)
-      if (SubExpressionUseData.count(SubExprs[j])) {
-        SubExprs.erase(SubExprs.begin()+j);
-        --j; --e;
-      }
+}
 
-    // Finally, add the non-shared expressions together.
-    if (SubExprs.empty())
-      Uses[i].Base = Zero;
-    else
-      Uses[i].Base = SE->getAddExpr(SubExprs);
-    SubExprs.clear();
+LSRFixup::LSRFixup()
+  : UserInst(0), OperandValToReplace(0), PostIncLoop(0),
+    LUIdx(~size_t(0)), Offset(0) {}
+
+void LSRFixup::print(raw_ostream &OS) const {
+  OS << "UserInst=";
+  // Store is common and interesting enough to be worth special-casing.
+  if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
+    OS << "store ";
+    WriteAsOperand(OS, Store->getOperand(0), /*PrintType=*/false);
+  } else if (UserInst->getType()->isVoidTy())
+    OS << UserInst->getOpcodeName();
+  else
+    WriteAsOperand(OS, UserInst, /*PrintType=*/false);
+
+  OS << ", OperandValToReplace=";
+  WriteAsOperand(OS, OperandValToReplace, /*PrintType=*/false);
+
+  if (PostIncLoop) {
+    OS << ", PostIncLoop=";
+    WriteAsOperand(OS, PostIncLoop->getHeader(), /*PrintType=*/false);
   }
 
-  return Result;
-}
+  if (LUIdx != ~size_t(0))
+    OS << ", LUIdx=" << LUIdx;
 
-/// ValidScale - Check whether the given Scale is valid for all loads and
-/// stores in UsersToProcess.
-///
-bool LoopStrengthReduce::ValidScale(bool HasBaseReg, int64_t Scale,
-                               const std::vector<BasedUser>& UsersToProcess) {
-  if (!TLI)
-    return true;
+  if (Offset != 0)
+    OS << ", Offset=" << Offset;
+}
 
-  for (unsigned i = 0, e = UsersToProcess.size(); i!=e; ++i) {
-    // If this is a load or other access, pass the type of the access in.
-    const Type *AccessTy =
-        Type::getVoidTy(UsersToProcess[i].Inst->getContext());
-    if (isAddressUse(UsersToProcess[i].Inst,
-                     UsersToProcess[i].OperandValToReplace))
-      AccessTy = getAccessType(UsersToProcess[i].Inst);
-    else if (isa<PHINode>(UsersToProcess[i].Inst))
-      continue;
+void LSRFixup::dump() const {
+  print(errs()); errs() << '\n';
+}
 
-    TargetLowering::AddrMode AM;
-    if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(UsersToProcess[i].Imm))
-      AM.BaseOffs = SC->getValue()->getSExtValue();
-    AM.HasBaseReg = HasBaseReg || !UsersToProcess[i].Base->isZero();
-    AM.Scale = Scale;
+namespace {
 
-    // If load[imm+r*scale] is illegal, bail out.
-    if (!TLI->isLegalAddressingMode(AM, AccessTy))
-      return false;
+/// UniquifierDenseMapInfo - A DenseMapInfo implementation for holding
+/// DenseMaps and DenseSets of sorted SmallVectors of const SCEV*.
+struct UniquifierDenseMapInfo {
+  static SmallVector<const SCEV *, 2> getEmptyKey() {
+    SmallVector<const SCEV *, 2> V;
+    V.push_back(reinterpret_cast<const SCEV *>(-1));
+    return V;
   }
-  return true;
-}
-
-/// ValidOffset - Check whether the given Offset is valid for all loads and
-/// stores in UsersToProcess.
-///
-bool LoopStrengthReduce::ValidOffset(bool HasBaseReg,
-                               int64_t Offset,
-                               int64_t Scale,
-                               const std::vector<BasedUser>& UsersToProcess) {
-  if (!TLI)
-    return true;
 
-  for (unsigned i=0, e = UsersToProcess.size(); i!=e; ++i) {
-    // If this is a load or other access, pass the type of the access in.
-    const Type *AccessTy =
-        Type::getVoidTy(UsersToProcess[i].Inst->getContext());
-    if (isAddressUse(UsersToProcess[i].Inst,
-                     UsersToProcess[i].OperandValToReplace))
-      AccessTy = getAccessType(UsersToProcess[i].Inst);
-    else if (isa<PHINode>(UsersToProcess[i].Inst))
-      continue;
+  static SmallVector<const SCEV *, 2> getTombstoneKey() {
+    SmallVector<const SCEV *, 2> V;
+    V.push_back(reinterpret_cast<const SCEV *>(-2));
+    return V;
+  }
 
-    TargetLowering::AddrMode AM;
-    if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(UsersToProcess[i].Imm))
-      AM.BaseOffs = SC->getValue()->getSExtValue();
-    AM.BaseOffs = (uint64_t)AM.BaseOffs + (uint64_t)Offset;
-    AM.HasBaseReg = HasBaseReg || !UsersToProcess[i].Base->isZero();
-    AM.Scale = Scale;
+  static unsigned getHashValue(const SmallVector<const SCEV *, 2> &V) {
+    unsigned Result = 0;
+    for (SmallVectorImpl<const SCEV *>::const_iterator I = V.begin(),
+         E = V.end(); I != E; ++I)
+      Result ^= DenseMapInfo<const SCEV *>::getHashValue(*I);
+    return Result;
+  }
 
-    // If load[imm+r*scale] is illegal, bail out.
-    if (!TLI->isLegalAddressingMode(AM, AccessTy))
-      return false;
+  static bool isEqual(const SmallVector<const SCEV *, 2> &LHS,
+                      const SmallVector<const SCEV *, 2> &RHS) {
+    return LHS == RHS;
   }
-  return true;
-}
+};
+
+/// LSRUse - This class holds the state that LSR keeps for each use in
+/// IVUsers, as well as uses invented by LSR itself. It includes information
+/// about what kinds of things can be folded into the user, information about
+/// the user itself, and information about how the use may be satisfied.
+/// TODO: Represent multiple users of the same expression in common?
+class LSRUse {
+  DenseSet<SmallVector<const SCEV *, 2>, UniquifierDenseMapInfo> Uniquifier;
+
+public:
+  /// KindType - An enum for a kind of use, indicating what types of
+  /// scaled and immediate operands it might support.
+  enum KindType {
+    Basic,   ///< A normal use, with no folding.
+    Special, ///< A special case of basic, allowing -1 scales.
+    Address, ///< An address use; folding according to TargetLowering
+    ICmpZero ///< An equality icmp with both operands folded into one.
+    // TODO: Add a generic icmp too?
+  };
 
-/// RequiresTypeConversion - Returns true if converting Ty1 to Ty2 is not
-/// a nop.
-bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1,
-                                                const Type *Ty2) {
-  if (Ty1 == Ty2)
-    return false;
-  Ty1 = SE->getEffectiveSCEVType(Ty1);
-  Ty2 = SE->getEffectiveSCEVType(Ty2);
-  if (Ty1 == Ty2)
-    return false;
-  if (Ty1->canLosslesslyBitCastTo(Ty2))
-    return false;
-  if (TLI && TLI->isTruncateFree(Ty1, Ty2))
-    return false;
-  return true;
-}
+  KindType Kind;
+  const Type *AccessTy;
 
-/// CheckForIVReuse - Returns the multiple if the stride is the multiple
-/// of a previous stride and it is a legal value for the target addressing
-/// mode scale component and optional base reg. This allows the users of
-/// this stride to be rewritten as prev iv * factor. It returns 0 if no
-/// reuse is possible.  Factors can be negative on same targets, e.g. ARM.
-///
-/// If all uses are outside the loop, we don't require that all multiplies
-/// be folded into the addressing mode, nor even that the factor be constant;
-/// a multiply (executed once) outside the loop is better than another IV
-/// within.  Well, usually.
-const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
-                                bool AllUsesAreAddresses,
-                                bool AllUsesAreOutsideLoop,
-                                const SCEV *Stride,
-                                IVExpr &IV, const Type *Ty,
-                                const std::vector<BasedUser>& UsersToProcess) {
-  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) {
-    int64_t SInt = SC->getValue()->getSExtValue();
-    for (unsigned NewStride = 0, e = IU->StrideOrder.size();
-         NewStride != e; ++NewStride) {
-      std::map<const SCEV *, IVsOfOneStride>::iterator SI =
-                IVsByStride.find(IU->StrideOrder[NewStride]);
-      if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first))
-        continue;
-      // The other stride has no uses, don't reuse it.
-      std::map<const SCEV *, IVUsersOfOneStride *>::iterator UI =
-        IU->IVUsesByStride.find(IU->StrideOrder[NewStride]);
-      if (UI->second->Users.empty())
-        continue;
-      int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
-      if (SI->first != Stride &&
-          (unsigned(abs64(SInt)) < SSInt || (SInt % SSInt) != 0))
-        continue;
-      int64_t Scale = SInt / SSInt;
-      // Check that this stride is valid for all the types used for loads and
-      // stores; if it can be used for some and not others, we might as well use
-      // the original stride everywhere, since we have to create the IV for it
-      // anyway. If the scale is 1, then we don't need to worry about folding
-      // multiplications.
-      if (Scale == 1 ||
-          (AllUsesAreAddresses &&
-           ValidScale(HasBaseReg, Scale, UsersToProcess))) {
-        // Prefer to reuse an IV with a base of zero.
-        for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
-               IE = SI->second.IVs.end(); II != IE; ++II)
-          // Only reuse previous IV if it would not require a type conversion
-          // and if the base difference can be folded.
-          if (II->Base->isZero() &&
-              !RequiresTypeConversion(II->Base->getType(), Ty)) {
-            IV = *II;
-            return SE->getIntegerSCEV(Scale, Stride->getType());
-          }
-        // Otherwise, settle for an IV with a foldable base.
-        if (AllUsesAreAddresses)
-          for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
-                 IE = SI->second.IVs.end(); II != IE; ++II)
-            // Only reuse previous IV if it would not require a type conversion
-            // and if the base difference can be folded.
-            if (SE->getEffectiveSCEVType(II->Base->getType()) ==
-                SE->getEffectiveSCEVType(Ty) &&
-                isa<SCEVConstant>(II->Base)) {
-              int64_t Base =
-                cast<SCEVConstant>(II->Base)->getValue()->getSExtValue();
-              if (Base > INT32_MIN && Base <= INT32_MAX &&
-                  ValidOffset(HasBaseReg, -Base * Scale,
-                              Scale, UsersToProcess)) {
-                IV = *II;
-                return SE->getIntegerSCEV(Scale, Stride->getType());
-              }
-            }
-      }
-    }
-  } else if (AllUsesAreOutsideLoop) {
-    // Accept nonconstant strides here; it is really really right to substitute
-    // an existing IV if we can.
-    for (unsigned NewStride = 0, e = IU->StrideOrder.size();
-         NewStride != e; ++NewStride) {
-      std::map<const SCEV *, IVsOfOneStride>::iterator SI =
-                IVsByStride.find(IU->StrideOrder[NewStride]);
-      if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first))
-        continue;
-      int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
-      if (SI->first != Stride && SSInt != 1)
-        continue;
-      for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
-             IE = SI->second.IVs.end(); II != IE; ++II)
-        // Accept nonzero base here.
-        // Only reuse previous IV if it would not require a type conversion.
-        if (!RequiresTypeConversion(II->Base->getType(), Ty)) {
-          IV = *II;
-          return Stride;
-        }
-    }
-    // Special case, old IV is -1*x and this one is x.  Can treat this one as
-    // -1*old.
-    for (unsigned NewStride = 0, e = IU->StrideOrder.size();
-         NewStride != e; ++NewStride) {
-      std::map<const SCEV *, IVsOfOneStride>::iterator SI =
-                IVsByStride.find(IU->StrideOrder[NewStride]);
-      if (SI == IVsByStride.end())
-        continue;
-      if (const SCEVMulExpr *ME = dyn_cast<SCEVMulExpr>(SI->first))
-        if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(ME->getOperand(0)))
-          if (Stride == ME->getOperand(1) &&
-              SC->getValue()->getSExtValue() == -1LL)
-            for (std::vector<IVExpr>::iterator II = SI->second.IVs.begin(),
-                   IE = SI->second.IVs.end(); II != IE; ++II)
-              // Accept nonzero base here.
-              // Only reuse previous IV if it would not require type conversion.
-              if (!RequiresTypeConversion(II->Base->getType(), Ty)) {
-                IV = *II;
-                return SE->getIntegerSCEV(-1LL, Stride->getType());
-              }
-    }
-  }
-  return SE->getIntegerSCEV(0, Stride->getType());
-}
-
-/// PartitionByIsUseOfPostIncrementedValue - Simple boolean predicate that
-/// returns true if Val's isUseOfPostIncrementedValue is true.
-static bool PartitionByIsUseOfPostIncrementedValue(const BasedUser &Val) {
-  return Val.isUseOfPostIncrementedValue;
-}
-
-/// isNonConstantNegative - Return true if the specified scev is negated, but
-/// not a constant.
-static bool isNonConstantNegative(const SCEV *Expr) {
-  const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Expr);
-  if (!Mul) return false;
-
-  // If there is a constant factor, it will be first.
-  const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
-  if (!SC) return false;
-
-  // Return true if the value is negative, this matches things like (-42 * V).
-  return SC->getValue()->getValue().isNegative();
-}
-
-/// CollectIVUsers - Transform our list of users and offsets to a bit more
-/// complex table. In this new vector, each 'BasedUser' contains 'Base', the
-/// base of the strided accesses, as well as the old information from Uses. We
-/// progressively move information from the Base field to the Imm field, until
-/// we eventually have the full access expression to rewrite the use.
-const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *Stride,
-                                               IVUsersOfOneStride &Uses,
-                                               Loop *L,
-                                               bool &AllUsesAreAddresses,
-                                               bool &AllUsesAreOutsideLoop,
-                                       std::vector<BasedUser> &UsersToProcess) {
-  // FIXME: Generalize to non-affine IV's.
-  if (!Stride->isLoopInvariant(L))
-    return SE->getIntegerSCEV(0, Stride->getType());
-
-  UsersToProcess.reserve(Uses.Users.size());
-  for (ilist<IVStrideUse>::iterator I = Uses.Users.begin(),
-       E = Uses.Users.end(); I != E; ++I) {
-    UsersToProcess.push_back(BasedUser(*I, SE));
-
-    // Move any loop variant operands from the offset field to the immediate
-    // field of the use, so that we don't try to use something before it is
-    // computed.
-    MoveLoopVariantsToImmediateField(UsersToProcess.back().Base,
-                                     UsersToProcess.back().Imm, L, SE);
-    assert(UsersToProcess.back().Base->isLoopInvariant(L) &&
-           "Base value is not loop invariant!");
-  }
-
-  // We now have a whole bunch of uses of like-strided induction variables, but
-  // they might all have different bases.  We want to emit one PHI node for this
-  // stride which we fold as many common expressions (between the IVs) into as
-  // possible.  Start by identifying the common expressions in the base values
-  // for the strides (e.g. if we have "A+C+B" and "A+B+D" as our bases, find
-  // "A+B"), emit it to the preheader, then remove the expression from the
-  // UsersToProcess base values.
-  const SCEV *CommonExprs =
-    RemoveCommonExpressionsFromUseBases(UsersToProcess, SE, L, TLI);
-
-  // Next, figure out what we can represent in the immediate fields of
-  // instructions.  If we can represent anything there, move it to the imm
-  // fields of the BasedUsers.  We do this so that it increases the commonality
-  // of the remaining uses.
-  unsigned NumPHI = 0;
-  bool HasAddress = false;
-  for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
-    // If the user is not in the current loop, this means it is using the exit
-    // value of the IV.  Do not put anything in the base, make sure it's all in
-    // the immediate field to allow as much factoring as possible.
-    if (!L->contains(UsersToProcess[i].Inst)) {
-      UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm,
-                                             UsersToProcess[i].Base);
-      UsersToProcess[i].Base =
-        SE->getIntegerSCEV(0, UsersToProcess[i].Base->getType());
-    } else {
-      // Not all uses are outside the loop.
-      AllUsesAreOutsideLoop = false;
-
-      // Addressing modes can be folded into loads and stores.  Be careful that
-      // the store is through the expression, not of the expression though.
-      bool isPHI = false;
-      bool isAddress = isAddressUse(UsersToProcess[i].Inst,
-                                    UsersToProcess[i].OperandValToReplace);
-      if (isa<PHINode>(UsersToProcess[i].Inst)) {
-        isPHI = true;
-        ++NumPHI;
-      }
+  SmallVector<int64_t, 8> Offsets;
+  int64_t MinOffset;
+  int64_t MaxOffset;
 
-      if (isAddress)
-        HasAddress = true;
+  /// AllFixupsOutsideLoop - This records whether all of the fixups using this
+  /// LSRUse are outside of the loop, in which case some special-case heuristics
+  /// may be used.
+  bool AllFixupsOutsideLoop;
 
-      // If this use isn't an address, then not all uses are addresses.
-      if (!isAddress && !isPHI)
-        AllUsesAreAddresses = false;
+  /// Formulae - A list of ways to build a value that can satisfy this user.
+  /// After the list is populated, one of these is selected heuristically and
+  /// used to formulate a replacement for OperandValToReplace in UserInst.
+  SmallVector<Formula, 12> Formulae;
 
-      MoveImmediateValues(TLI, UsersToProcess[i].Inst, UsersToProcess[i].Base,
-                          UsersToProcess[i].Imm, isAddress, L, SE);
-    }
-  }
+  /// Regs - The set of register candidates used by all formulae in this LSRUse.
+  SmallPtrSet<const SCEV *, 4> Regs;
 
-  // If one of the use is a PHI node and all other uses are addresses, still
-  // allow iv reuse. Essentially we are trading one constant multiplication
-  // for one fewer iv.
-  if (NumPHI > 1)
-    AllUsesAreAddresses = false;
+  LSRUse(KindType K, const Type *T) : Kind(K), AccessTy(T),
+                                      MinOffset(INT64_MAX),
+                                      MaxOffset(INT64_MIN),
+                                      AllFixupsOutsideLoop(true) {}
 
-  // There are no in-loop address uses.
-  if (AllUsesAreAddresses && (!HasAddress && !AllUsesAreOutsideLoop))
-    AllUsesAreAddresses = false;
+  bool InsertFormula(const Formula &F);
 
-  return CommonExprs;
-}
+  void check() const;
 
-/// ShouldUseFullStrengthReductionMode - Test whether full strength-reduction
-/// is valid and profitable for the given set of users of a stride. In
-/// full strength-reduction mode, all addresses at the current stride are
-/// strength-reduced all the way down to pointer arithmetic.
-///
-bool LoopStrengthReduce::ShouldUseFullStrengthReductionMode(
-                                   const std::vector<BasedUser> &UsersToProcess,
-                                   const Loop *L,
-                                   bool AllUsesAreAddresses,
-                                   const SCEV *Stride) {
-  if (!EnableFullLSRMode)
-    return false;
+  void print(raw_ostream &OS) const;
+  void dump() const;
+};
 
-  // The heuristics below aim to avoid increasing register pressure, but
-  // fully strength-reducing all the addresses increases the number of
-  // add instructions, so don't do this when optimizing for size.
-  // TODO: If the loop is large, the savings due to simpler addresses
-  // may oughtweight the costs of the extra increment instructions.
-  if (L->getHeader()->getParent()->hasFnAttr(Attribute::OptimizeForSize))
-    return false;
+/// InsertFormula - If the given formula has not yet been inserted, add it to
+/// the list, and return true. Return false otherwise.
+bool LSRUse::InsertFormula(const Formula &F) {
+  SmallVector<const SCEV *, 2> Key = F.BaseRegs;
+  if (F.ScaledReg) Key.push_back(F.ScaledReg);
+  // Unstable sort by host order ok, because this is only used for uniquifying.
+  std::sort(Key.begin(), Key.end());
 
-  // TODO: For now, don't do full strength reduction if there could
-  // potentially be greater-stride multiples of the current stride
-  // which could reuse the current stride IV.
-  if (IU->StrideOrder.back() != Stride)
+  if (!Uniquifier.insert(Key).second)
     return false;
 
-  // Iterate through the uses to find conditions that automatically rule out
-  // full-lsr mode.
-  for (unsigned i = 0, e = UsersToProcess.size(); i != e; ) {
-    const SCEV *Base = UsersToProcess[i].Base;
-    const SCEV *Imm = UsersToProcess[i].Imm;
-    // If any users have a loop-variant component, they can't be fully
-    // strength-reduced.
-    if (Imm && !Imm->isLoopInvariant(L))
-      return false;
-    // If there are to users with the same base and the difference between
-    // the two Imm values can't be folded into the address, full
-    // strength reduction would increase register pressure.
-    do {
-      const SCEV *CurImm = UsersToProcess[i].Imm;
-      if ((CurImm || Imm) && CurImm != Imm) {
-        if (!CurImm) CurImm = SE->getIntegerSCEV(0, Stride->getType());
-        if (!Imm)       Imm = SE->getIntegerSCEV(0, Stride->getType());
-        const Instruction *Inst = UsersToProcess[i].Inst;
-        const Type *AccessTy = getAccessType(Inst);
-        const SCEV *Diff = SE->getMinusSCEV(UsersToProcess[i].Imm, Imm);
-        if (!Diff->isZero() &&
-            (!AllUsesAreAddresses ||
-             !fitsInAddressMode(Diff, AccessTy, TLI, /*HasBaseReg=*/true)))
-          return false;
-      }
-    } while (++i != e && Base == UsersToProcess[i].Base);
-  }
+  // Using a register to hold the value of 0 is not profitable.
+  assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&
+         "Zero allocated in a scaled register!");
+#ifndef NDEBUG
+  for (SmallVectorImpl<const SCEV *>::const_iterator I =
+       F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I)
+    assert(!(*I)->isZero() && "Zero allocated in a base register!");
+#endif
 
-  // If there's exactly one user in this stride, fully strength-reducing it
-  // won't increase register pressure. If it's starting from a non-zero base,
-  // it'll be simpler this way.
-  if (UsersToProcess.size() == 1 && !UsersToProcess[0].Base->isZero())
-    return true;
+  // Add the formula to the list.
+  Formulae.push_back(F);
 
-  // Otherwise, if there are any users in this stride that don't require
-  // a register for their base, full strength-reduction will increase
-  // register pressure.
-  for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
-    if (UsersToProcess[i].Base->isZero())
-      return false;
+  // Record registers now being used by this use.
+  if (F.ScaledReg) Regs.insert(F.ScaledReg);
+  Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
 
-  // Otherwise, go for it.
   return true;
 }
 
-/// InsertAffinePhi Create and insert a PHI node for an induction variable
-/// with the specified start and step values in the specified loop.
-///
-/// If NegateStride is true, the stride should be negated by using a
-/// subtract instead of an add.
-///
-/// Return the created phi node.
-///
-static PHINode *InsertAffinePhi(const SCEV *Start, const SCEV *Step,
-                                Instruction *IVIncInsertPt,
-                                const Loop *L,
-                                SCEVExpander &Rewriter) {
-  assert(Start->isLoopInvariant(L) && "New PHI start is not loop invariant!");
-  assert(Step->isLoopInvariant(L) && "New PHI stride is not loop invariant!");
-
-  BasicBlock *Header = L->getHeader();
-  BasicBlock *Preheader = L->getLoopPreheader();
-  BasicBlock *LatchBlock = L->getLoopLatch();
-  const Type *Ty = Start->getType();
-  Ty = Rewriter.SE.getEffectiveSCEVType(Ty);
-
-  PHINode *PN = PHINode::Create(Ty, "lsr.iv", Header->begin());
-  PN->addIncoming(Rewriter.expandCodeFor(Start, Ty, Preheader->getTerminator()),
-                  Preheader);
-
-  // If the stride is negative, insert a sub instead of an add for the
-  // increment.
-  bool isNegative = isNonConstantNegative(Step);
-  const SCEV *IncAmount = Step;
-  if (isNegative)
-    IncAmount = Rewriter.SE.getNegativeSCEV(Step);
-
-  // Insert an add instruction right before the terminator corresponding
-  // to the back-edge or just before the only use. The location is determined
-  // by the caller and passed in as IVIncInsertPt.
-  Value *StepV = Rewriter.expandCodeFor(IncAmount, Ty,
-                                        Preheader->getTerminator());
-  Instruction *IncV;
-  if (isNegative) {
-    IncV = BinaryOperator::CreateSub(PN, StepV, "lsr.iv.next",
-                                     IVIncInsertPt);
-  } else {
-    IncV = BinaryOperator::CreateAdd(PN, StepV, "lsr.iv.next",
-                                     IVIncInsertPt);
-  }
-  if (!isa<ConstantInt>(StepV)) ++NumVariable;
-
-  PN->addIncoming(IncV, LatchBlock);
-
-  ++NumInserted;
-  return PN;
-}
-
-static void SortUsersToProcess(std::vector<BasedUser> &UsersToProcess) {
-  // We want to emit code for users inside the loop first.  To do this, we
-  // rearrange BasedUser so that the entries at the end have
-  // isUseOfPostIncrementedValue = false, because we pop off the end of the
-  // vector (so we handle them first).
-  std::partition(UsersToProcess.begin(), UsersToProcess.end(),
-                 PartitionByIsUseOfPostIncrementedValue);
-
-  // Sort this by base, so that things with the same base are handled
-  // together.  By partitioning first and stable-sorting later, we are
-  // guaranteed that within each base we will pop off users from within the
-  // loop before users outside of the loop with a particular base.
-  //
-  // We would like to use stable_sort here, but we can't.  The problem is that
-  // const SCEV *'s don't have a deterministic ordering w.r.t to each other, so
-  // we don't have anything to do a '<' comparison on.  Because we think the
-  // number of uses is small, do a horrible bubble sort which just relies on
-  // ==.
-  for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
-    // Get a base value.
-    const SCEV *Base = UsersToProcess[i].Base;
-
-    // Compact everything with this base to be consecutive with this one.
-    for (unsigned j = i+1; j != e; ++j) {
-      if (UsersToProcess[j].Base == Base) {
-        std::swap(UsersToProcess[i+1], UsersToProcess[j]);
-        ++i;
-      }
-    }
+void LSRUse::print(raw_ostream &OS) const {
+  OS << "LSR Use: Kind=";
+  switch (Kind) {
+  case Basic:    OS << "Basic"; break;
+  case Special:  OS << "Special"; break;
+  case ICmpZero: OS << "ICmpZero"; break;
+  case Address:
+    OS << "Address of ";
+    if (AccessTy->isPointerTy())
+      OS << "pointer"; // the full pointer type could be really verbose
+    else
+      OS << *AccessTy;
   }
-}
 
-/// PrepareToStrengthReduceFully - Prepare to fully strength-reduce
-/// UsersToProcess, meaning lowering addresses all the way down to direct
-/// pointer arithmetic.
-///
-void
-LoopStrengthReduce::PrepareToStrengthReduceFully(
-                                        std::vector<BasedUser> &UsersToProcess,
-                                        const SCEV *Stride,
-                                        const SCEV *CommonExprs,
-                                        const Loop *L,
-                                        SCEVExpander &PreheaderRewriter) {
-  DEBUG(dbgs() << "  Fully reducing all users\n");
-
-  // Rewrite the UsersToProcess records, creating a separate PHI for each
-  // unique Base value.
-  Instruction *IVIncInsertPt = L->getLoopLatch()->getTerminator();
-  for (unsigned i = 0, e = UsersToProcess.size(); i != e; ) {
-    // TODO: The uses are grouped by base, but not sorted. We arbitrarily
-    // pick the first Imm value here to start with, and adjust it for the
-    // other uses.
-    const SCEV *Imm = UsersToProcess[i].Imm;
-    const SCEV *Base = UsersToProcess[i].Base;
-    const SCEV *Start = SE->getAddExpr(CommonExprs, Base, Imm);
-    PHINode *Phi = InsertAffinePhi(Start, Stride, IVIncInsertPt, L,
-                                   PreheaderRewriter);
-    // Loop over all the users with the same base.
-    do {
-      UsersToProcess[i].Base = SE->getIntegerSCEV(0, Stride->getType());
-      UsersToProcess[i].Imm = SE->getMinusSCEV(UsersToProcess[i].Imm, Imm);
-      UsersToProcess[i].Phi = Phi;
-      assert(UsersToProcess[i].Imm->isLoopInvariant(L) &&
-             "ShouldUseFullStrengthReductionMode should reject this!");
-    } while (++i != e && Base == UsersToProcess[i].Base);
-  }
-}
-
-/// FindIVIncInsertPt - Return the location to insert the increment instruction.
-/// If the only use if a use of postinc value, (must be the loop termination
-/// condition), then insert it just before the use.
-static Instruction *FindIVIncInsertPt(std::vector<BasedUser> &UsersToProcess,
-                                      const Loop *L) {
-  if (UsersToProcess.size() == 1 &&
-      UsersToProcess[0].isUseOfPostIncrementedValue &&
-      L->contains(UsersToProcess[0].Inst))
-    return UsersToProcess[0].Inst;
-  return L->getLoopLatch()->getTerminator();
-}
-
-/// PrepareToStrengthReduceWithNewPhi - Insert a new induction variable for the
-/// given users to share.
-///
-void
-LoopStrengthReduce::PrepareToStrengthReduceWithNewPhi(
-                                         std::vector<BasedUser> &UsersToProcess,
-                                         const SCEV *Stride,
-                                         const SCEV *CommonExprs,
-                                         Value *CommonBaseV,
-                                         Instruction *IVIncInsertPt,
-                                         const Loop *L,
-                                         SCEVExpander &PreheaderRewriter) {
-  DEBUG(dbgs() << "  Inserting new PHI:\n");
-
-  PHINode *Phi = InsertAffinePhi(SE->getUnknown(CommonBaseV),
-                                 Stride, IVIncInsertPt, L,
-                                 PreheaderRewriter);
-
-  // Remember this in case a later stride is multiple of this.
-  IVsByStride[Stride].addIV(Stride, CommonExprs, Phi);
-
-  // All the users will share this new IV.
-  for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
-    UsersToProcess[i].Phi = Phi;
-
-  DEBUG(dbgs() << "    IV=");
-  DEBUG(WriteAsOperand(dbgs(), Phi, /*PrintType=*/false));
-  DEBUG(dbgs() << "\n");
-}
-
-/// PrepareToStrengthReduceFromSmallerStride - Prepare for the given users to
-/// reuse an induction variable with a stride that is a factor of the current
-/// induction variable.
-///
-void
-LoopStrengthReduce::PrepareToStrengthReduceFromSmallerStride(
-                                         std::vector<BasedUser> &UsersToProcess,
-                                         Value *CommonBaseV,
-                                         const IVExpr &ReuseIV,
-                                         Instruction *PreInsertPt) {
-  DEBUG(dbgs() << "  Rewriting in terms of existing IV of STRIDE "
-               << *ReuseIV.Stride << " and BASE " << *ReuseIV.Base << "\n");
-
-  // All the users will share the reused IV.
-  for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
-    UsersToProcess[i].Phi = ReuseIV.PHI;
-
-  Constant *C = dyn_cast<Constant>(CommonBaseV);
-  if (C &&
-      (!C->isNullValue() &&
-       !fitsInAddressMode(SE->getUnknown(CommonBaseV), CommonBaseV->getType(),
-                         TLI, false)))
-    // We want the common base emitted into the preheader! This is just
-    // using cast as a copy so BitCast (no-op cast) is appropriate
-    CommonBaseV = new BitCastInst(CommonBaseV, CommonBaseV->getType(),
-                                  "commonbase", PreInsertPt);
-}
-
-static bool IsImmFoldedIntoAddrMode(GlobalValue *GV, int64_t Offset,
-                                    const Type *AccessTy,
-                                   std::vector<BasedUser> &UsersToProcess,
-                                   const TargetLowering *TLI) {
-  SmallVector<Instruction*, 16> AddrModeInsts;
-  for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {
-    if (UsersToProcess[i].isUseOfPostIncrementedValue)
-      continue;
-    ExtAddrMode AddrMode =
-      AddressingModeMatcher::Match(UsersToProcess[i].OperandValToReplace,
-                                   AccessTy, UsersToProcess[i].Inst,
-                                   AddrModeInsts, *TLI);
-    if (GV && GV != AddrMode.BaseGV)
-      return false;
-    if (Offset && !AddrMode.BaseOffs)
-      // FIXME: How to accurate check it's immediate offset is folded.
-      return false;
-    AddrModeInsts.clear();
+  OS << ", Offsets={";
+  for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
+       E = Offsets.end(); I != E; ++I) {
+    OS << *I;
+    if (next(I) != E)
+      OS << ',';
   }
-  return true;
-}
+  OS << '}';
 
-/// StrengthReduceIVUsersOfStride - Strength reduce all of the users of a single
-/// stride of IV.  All of the users may have different starting values, and this
-/// may not be the only stride.
-void
-LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *Stride,
-                                                  IVUsersOfOneStride &Uses,
-                                                  Loop *L) {
-  // If all the users are moved to another stride, then there is nothing to do.
-  if (Uses.Users.empty())
-    return;
+  if (AllFixupsOutsideLoop)
+    OS << ", all-fixups-outside-loop";
+}
 
-  // Keep track if every use in UsersToProcess is an address. If they all are,
-  // we may be able to rewrite the entire collection of them in terms of a
-  // smaller-stride IV.
-  bool AllUsesAreAddresses = true;
-
-  // Keep track if every use of a single stride is outside the loop.  If so,
-  // we want to be more aggressive about reusing a smaller-stride IV; a
-  // multiply outside the loop is better than another IV inside.  Well, usually.
-  bool AllUsesAreOutsideLoop = true;
-
-  // Transform our list of users and offsets to a bit more complex table.  In
-  // this new vector, each 'BasedUser' contains 'Base' the base of the strided
-  // access as well as the old information from Uses. We progressively move
-  // information from the Base field to the Imm field until we eventually have
-  // the full access expression to rewrite the use.
-  std::vector<BasedUser> UsersToProcess;
-  const SCEV *CommonExprs = CollectIVUsers(Stride, Uses, L, AllUsesAreAddresses,
-                                           AllUsesAreOutsideLoop,
-                                           UsersToProcess);
-
-  // Sort the UsersToProcess array so that users with common bases are
-  // next to each other.
-  SortUsersToProcess(UsersToProcess);
-
-  // If we managed to find some expressions in common, we'll need to carry
-  // their value in a register and add it in for each use. This will take up
-  // a register operand, which potentially restricts what stride values are
-  // valid.
-  bool HaveCommonExprs = !CommonExprs->isZero();
-  const Type *ReplacedTy = CommonExprs->getType();
-
-  // If all uses are addresses, consider sinking the immediate part of the
-  // common expression back into uses if they can fit in the immediate fields.
-  if (TLI && HaveCommonExprs && AllUsesAreAddresses) {
-    const SCEV *NewCommon = CommonExprs;
-    const SCEV *Imm = SE->getIntegerSCEV(0, ReplacedTy);
-    MoveImmediateValues(TLI, Type::getVoidTy(
-                        L->getLoopPreheader()->getContext()),
-                        NewCommon, Imm, true, L, SE);
-    if (!Imm->isZero()) {
-      bool DoSink = true;
-
-      // If the immediate part of the common expression is a GV, check if it's
-      // possible to fold it into the target addressing mode.
-      GlobalValue *GV = 0;
-      if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(Imm))
-        GV = dyn_cast<GlobalValue>(SU->getValue());
-      int64_t Offset = 0;
-      if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Imm))
-        Offset = SC->getValue()->getSExtValue();
-      if (GV || Offset)
-        // Pass VoidTy as the AccessTy to be conservative, because
-        // there could be multiple access types among all the uses.
-        DoSink = IsImmFoldedIntoAddrMode(GV, Offset,
-                          Type::getVoidTy(L->getLoopPreheader()->getContext()),
-                                         UsersToProcess, TLI);
-
-      if (DoSink) {
-        DEBUG(dbgs() << "  Sinking " << *Imm << " back down into uses\n");
-        for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)
-          UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm, Imm);
-        CommonExprs = NewCommon;
-        HaveCommonExprs = !CommonExprs->isZero();
-        ++NumImmSunk;
-      }
-    }
-  }
+void LSRUse::dump() const {
+  print(errs()); errs() << '\n';
+}
 
-  // Now that we know what we need to do, insert the PHI node itself.
-  //
-  DEBUG(dbgs() << "LSR: Examining IVs of TYPE " << *ReplacedTy << " of STRIDE "
-               << *Stride << ":\n"
-               << "  Common base: " << *CommonExprs << '\n');
+/// isLegalUse - Test whether the use described by AM is "legal", meaning it can
+/// be completely folded into the user instruction at isel time. This includes
+/// address-mode folding and special icmp tricks.
+static bool isLegalUse(const TargetLowering::AddrMode &AM,
+                       LSRUse::KindType Kind, const Type *AccessTy,
+                       const TargetLowering *TLI) {
+  switch (Kind) {
+  case LSRUse::Address:
+    // If we have low-level target information, ask the target if it can
+    // completely fold this address.
+    if (TLI) return TLI->isLegalAddressingMode(AM, AccessTy);
+
+    // Otherwise, just guess that reg+reg addressing is legal.
+    return !AM.BaseGV && AM.BaseOffs == 0 && AM.Scale <= 1;
+
+  case LSRUse::ICmpZero:
+    // There's not even a target hook for querying whether it would be legal to
+    // fold a GV into an ICmp.
+    if (AM.BaseGV)
+      return false;
 
-  SCEVExpander Rewriter(*SE);
-  SCEVExpander PreheaderRewriter(*SE);
+    // ICmp only has two operands; don't allow more than two non-trivial parts.
+    if (AM.Scale != 0 && AM.HasBaseReg && AM.BaseOffs != 0)
+      return false;
 
-  BasicBlock  *Preheader = L->getLoopPreheader();
-  Instruction *PreInsertPt = Preheader->getTerminator();
-  BasicBlock *LatchBlock = L->getLoopLatch();
-  Instruction *IVIncInsertPt = LatchBlock->getTerminator();
-
-  Value *CommonBaseV = Constant::getNullValue(ReplacedTy);
-
-  const SCEV *RewriteFactor = SE->getIntegerSCEV(0, ReplacedTy);
-  IVExpr   ReuseIV(SE->getIntegerSCEV(0,
-                                    Type::getInt32Ty(Preheader->getContext())),
-                   SE->getIntegerSCEV(0,
-                                    Type::getInt32Ty(Preheader->getContext())),
-                   0);
-
-  // Choose a strength-reduction strategy and prepare for it by creating
-  // the necessary PHIs and adjusting the bookkeeping.
-  if (ShouldUseFullStrengthReductionMode(UsersToProcess, L,
-                                         AllUsesAreAddresses, Stride)) {
-    PrepareToStrengthReduceFully(UsersToProcess, Stride, CommonExprs, L,
-                                 PreheaderRewriter);
-  } else {
-    // Emit the initial base value into the loop preheader.
-    CommonBaseV = PreheaderRewriter.expandCodeFor(CommonExprs, ReplacedTy,
-                                                  PreInsertPt);
-
-    // If all uses are addresses, check if it is possible to reuse an IV.  The
-    // new IV must have a stride that is a multiple of the old stride; the
-    // multiple must be a number that can be encoded in the scale field of the
-    // target addressing mode; and we must have a valid instruction after this
-    // substitution, including the immediate field, if any.
-    RewriteFactor = CheckForIVReuse(HaveCommonExprs, AllUsesAreAddresses,
-                                    AllUsesAreOutsideLoop,
-                                    Stride, ReuseIV, ReplacedTy,
-                                    UsersToProcess);
-    if (!RewriteFactor->isZero())
-      PrepareToStrengthReduceFromSmallerStride(UsersToProcess, CommonBaseV,
-                                               ReuseIV, PreInsertPt);
-    else {
-      IVIncInsertPt = FindIVIncInsertPt(UsersToProcess, L);
-      PrepareToStrengthReduceWithNewPhi(UsersToProcess, Stride, CommonExprs,
-                                        CommonBaseV, IVIncInsertPt,
-                                        L, PreheaderRewriter);
-    }
-  }
+    // ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by
+    // putting the scaled register in the other operand of the icmp.
+    if (AM.Scale != 0 && AM.Scale != -1)
+      return false;
 
-  // Process all the users now, replacing their strided uses with
-  // strength-reduced forms.  This outer loop handles all bases, the inner
-  // loop handles all users of a particular base.
-  while (!UsersToProcess.empty()) {
-    const SCEV *Base = UsersToProcess.back().Base;
-    Instruction *Inst = UsersToProcess.back().Inst;
-
-    // Emit the code for Base into the preheader.
-    Value *BaseV = 0;
-    if (!Base->isZero()) {
-      BaseV = PreheaderRewriter.expandCodeFor(Base, 0, PreInsertPt);
-
-      DEBUG(dbgs() << "  INSERTING code for BASE = " << *Base << ":");
-      if (BaseV->hasName())
-        DEBUG(dbgs() << " Result value name = %" << BaseV->getName());
-      DEBUG(dbgs() << "\n");
-
-      // If BaseV is a non-zero constant, make sure that it gets inserted into
-      // the preheader, instead of being forward substituted into the uses.  We
-      // do this by forcing a BitCast (noop cast) to be inserted into the
-      // preheader in this case.
-      if (!fitsInAddressMode(Base, getAccessType(Inst), TLI, false) &&
-          isa<Constant>(BaseV)) {
-        // We want this constant emitted into the preheader! This is just
-        // using cast as a copy so BitCast (no-op cast) is appropriate
-        BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert",
-                                PreInsertPt);
-      }
+    // If we have low-level target information, ask the target if it can fold an
+    // integer immediate on an icmp.
+    if (AM.BaseOffs != 0) {
+      if (TLI) return TLI->isLegalICmpImmediate(-AM.BaseOffs);
+      return false;
     }
 
-    // Emit the code to add the immediate offset to the Phi value, just before
-    // the instructions that we identified as using this stride and base.
-    do {
-      // FIXME: Use emitted users to emit other users.
-      BasedUser &User = UsersToProcess.back();
-
-      DEBUG(dbgs() << "    Examining ");
-      if (User.isUseOfPostIncrementedValue)
-        DEBUG(dbgs() << "postinc");
-      else
-        DEBUG(dbgs() << "preinc");
-      DEBUG(dbgs() << " use ");
-      DEBUG(WriteAsOperand(dbgs(), UsersToProcess.back().OperandValToReplace,
-                           /*PrintType=*/false));
-      DEBUG(dbgs() << " in Inst: " << *User.Inst << '\n');
-
-      // If this instruction wants to use the post-incremented value, move it
-      // after the post-inc and use its value instead of the PHI.
-      Value *RewriteOp = User.Phi;
-      if (User.isUseOfPostIncrementedValue) {
-        RewriteOp = User.Phi->getIncomingValueForBlock(LatchBlock);
-        // If this user is in the loop, make sure it is the last thing in the
-        // loop to ensure it is dominated by the increment. In case it's the
-        // only use of the iv, the increment instruction is already before the
-        // use.
-        if (L->contains(User.Inst) && User.Inst != IVIncInsertPt)
-          User.Inst->moveBefore(IVIncInsertPt);
-      }
-
-      const SCEV *RewriteExpr = SE->getUnknown(RewriteOp);
-
-      if (SE->getEffectiveSCEVType(RewriteOp->getType()) !=
-          SE->getEffectiveSCEVType(ReplacedTy)) {
-        assert(SE->getTypeSizeInBits(RewriteOp->getType()) >
-               SE->getTypeSizeInBits(ReplacedTy) &&
-               "Unexpected widening cast!");
-        RewriteExpr = SE->getTruncateExpr(RewriteExpr, ReplacedTy);
-      }
-
-      // If we had to insert new instructions for RewriteOp, we have to
-      // consider that they may not have been able to end up immediately
-      // next to RewriteOp, because non-PHI instructions may never precede
-      // PHI instructions in a block. In this case, remember where the last
-      // instruction was inserted so that if we're replacing a different
-      // PHI node, we can use the later point to expand the final
-      // RewriteExpr.
-      Instruction *NewBasePt = dyn_cast<Instruction>(RewriteOp);
-      if (RewriteOp == User.Phi) NewBasePt = 0;
-
-      // Clear the SCEVExpander's expression map so that we are guaranteed
-      // to have the code emitted where we expect it.
-      Rewriter.clear();
-
-      // If we are reusing the iv, then it must be multiplied by a constant
-      // factor to take advantage of the addressing mode scale component.
-      if (!RewriteFactor->isZero()) {
-        // If we're reusing an IV with a nonzero base (currently this happens
-        // only when all reuses are outside the loop) subtract that base here.
-        // The base has been used to initialize the PHI node but we don't want
-        // it here.
-        if (!ReuseIV.Base->isZero()) {
-          const SCEV *typedBase = ReuseIV.Base;
-          if (SE->getEffectiveSCEVType(RewriteExpr->getType()) !=
-              SE->getEffectiveSCEVType(ReuseIV.Base->getType())) {
-            // It's possible the original IV is a larger type than the new IV,
-            // in which case we have to truncate the Base.  We checked in
-            // RequiresTypeConversion that this is valid.
-            assert(SE->getTypeSizeInBits(RewriteExpr->getType()) <
-                   SE->getTypeSizeInBits(ReuseIV.Base->getType()) &&
-                   "Unexpected lengthening conversion!");
-            typedBase = SE->getTruncateExpr(ReuseIV.Base,
-                                            RewriteExpr->getType());
-          }
-          RewriteExpr = SE->getMinusSCEV(RewriteExpr, typedBase);
-        }
+    return true;
 
-        // Multiply old variable, with base removed, by new scale factor.
-        RewriteExpr = SE->getMulExpr(RewriteFactor,
-                                     RewriteExpr);
-
-        // The common base is emitted in the loop preheader. But since we
-        // are reusing an IV, it has not been used to initialize the PHI node.
-        // Add it to the expression used to rewrite the uses.
-        // When this use is outside the loop, we earlier subtracted the
-        // common base, and are adding it back here.  Use the same expression
-        // as before, rather than CommonBaseV, so DAGCombiner will zap it.
-        if (!CommonExprs->isZero()) {
-          if (L->contains(User.Inst))
-            RewriteExpr = SE->getAddExpr(RewriteExpr,
-                                       SE->getUnknown(CommonBaseV));
-          else
-            RewriteExpr = SE->getAddExpr(RewriteExpr, CommonExprs);
-        }
-      }
+  case LSRUse::Basic:
+    // Only handle single-register values.
+    return !AM.BaseGV && AM.Scale == 0 && AM.BaseOffs == 0;
 
-      // Now that we know what we need to do, insert code before User for the
-      // immediate and any loop-variant expressions.
-      if (BaseV)
-        // Add BaseV to the PHI value if needed.
-        RewriteExpr = SE->getAddExpr(RewriteExpr, SE->getUnknown(BaseV));
-
-      User.RewriteInstructionToUseNewBase(RewriteExpr, NewBasePt,
-                                          Rewriter, L, this,
-                                          DeadInsts, SE);
-
-      // Mark old value we replaced as possibly dead, so that it is eliminated
-      // if we just replaced the last use of that value.
-      DeadInsts.push_back(User.OperandValToReplace);
-
-      UsersToProcess.pop_back();
-      ++NumReduced;
-
-      // If there are any more users to process with the same base, process them
-      // now.  We sorted by base above, so we just have to check the last elt.
-    } while (!UsersToProcess.empty() && UsersToProcess.back().Base == Base);
-    // TODO: Next, find out which base index is the most common, pull it out.
-  }
-
-  // IMPORTANT TODO: Figure out how to partition the IV's with this stride, but
-  // different starting values, into different PHIs.
-}
-
-void LoopStrengthReduce::StrengthReduceIVUsers(Loop *L) {
-  // Note: this processes each stride/type pair individually.  All users
-  // passed into StrengthReduceIVUsersOfStride have the same type AND stride.
-  // Also, note that we iterate over IVUsesByStride indirectly by using
-  // StrideOrder. This extra layer of indirection makes the ordering of
-  // strides deterministic - not dependent on map order.
-  for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e; ++Stride) {
-    std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
-      IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
-    assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
-    // FIXME: Generalize to non-affine IV's.
-    if (!SI->first->isLoopInvariant(L))
-      continue;
-    StrengthReduceIVUsersOfStride(SI->first, *SI->second, L);
+  case LSRUse::Special:
+    // Only handle -1 scales, or no scale.
+    return AM.Scale == 0 || AM.Scale == -1;
   }
+
+  return false;
 }
 
-/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
-/// set the IV user and stride information and return true, otherwise return
-/// false.
-bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond,
-                                           IVStrideUse *&CondUse,
-                                           const SCEV* &CondStride) {
-  for (unsigned Stride = 0, e = IU->StrideOrder.size();
-       Stride != e && !CondUse; ++Stride) {
-    std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
-      IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
-    assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
-
-    for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
-         E = SI->second->Users.end(); UI != E; ++UI)
-      if (UI->getUser() == Cond) {
-        // NOTE: we could handle setcc instructions with multiple uses here, but
-        // InstCombine does it as well for simple uses, it's not clear that it
-        // occurs enough in real life to handle.
-        CondUse = UI;
-        CondStride = SI->first;
-        return true;
-      }
+static bool isLegalUse(TargetLowering::AddrMode AM,
+                       int64_t MinOffset, int64_t MaxOffset,
+                       LSRUse::KindType Kind, const Type *AccessTy,
+                       const TargetLowering *TLI) {
+  // Check for overflow.
+  if (((int64_t)((uint64_t)AM.BaseOffs + MinOffset) > AM.BaseOffs) !=
+      (MinOffset > 0))
+    return false;
+  AM.BaseOffs = (uint64_t)AM.BaseOffs + MinOffset;
+  if (isLegalUse(AM, Kind, AccessTy, TLI)) {
+    AM.BaseOffs = (uint64_t)AM.BaseOffs - MinOffset;
+    // Check for overflow.
+    if (((int64_t)((uint64_t)AM.BaseOffs + MaxOffset) > AM.BaseOffs) !=
+        (MaxOffset > 0))
+      return false;
+    AM.BaseOffs = (uint64_t)AM.BaseOffs + MaxOffset;
+    return isLegalUse(AM, Kind, AccessTy, TLI);
   }
   return false;
 }
 
-namespace {
-  // Constant strides come first which in turns are sorted by their absolute
-  // values. If absolute values are the same, then positive strides comes first.
-  // e.g.
-  // 4, -1, X, 1, 2 ==> 1, -1, 2, 4, X
-  struct StrideCompare {
-    const ScalarEvolution *SE;
-    explicit StrideCompare(const ScalarEvolution *se) : SE(se) {}
-
-    bool operator()(const SCEV *LHS, const SCEV *RHS) {
-      const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS);
-      const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS);
-      if (LHSC && RHSC) {
-        int64_t  LV = LHSC->getValue()->getSExtValue();
-        int64_t  RV = RHSC->getValue()->getSExtValue();
-        uint64_t ALV = (LV < 0) ? -LV : LV;
-        uint64_t ARV = (RV < 0) ? -RV : RV;
-        if (ALV == ARV) {
-          if (LV != RV)
-            return LV > RV;
-        } else {
-          return ALV < ARV;
-        }
+static bool isAlwaysFoldable(int64_t BaseOffs,
+                             GlobalValue *BaseGV,
+                             bool HasBaseReg,
+                             LSRUse::KindType Kind, const Type *AccessTy,
+                             const TargetLowering *TLI) {
+  // Fast-path: zero is always foldable.
+  if (BaseOffs == 0 && !BaseGV) return true;
+
+  // Conservatively, create an address with an immediate and a
+  // base and a scale.
+  TargetLowering::AddrMode AM;
+  AM.BaseOffs = BaseOffs;
+  AM.BaseGV = BaseGV;
+  AM.HasBaseReg = HasBaseReg;
+  AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
+
+  return isLegalUse(AM, Kind, AccessTy, TLI);
+}
 
-        // If it's the same value but different type, sort by bit width so
-        // that we emit larger induction variables before smaller
-        // ones, letting the smaller be re-written in terms of larger ones.
-        return SE->getTypeSizeInBits(RHS->getType()) <
-               SE->getTypeSizeInBits(LHS->getType());
-      }
-      return LHSC && !RHSC;
-    }
-  };
+static bool isAlwaysFoldable(const SCEV *S,
+                             int64_t MinOffset, int64_t MaxOffset,
+                             bool HasBaseReg,
+                             LSRUse::KindType Kind, const Type *AccessTy,
+                             const TargetLowering *TLI,
+                             ScalarEvolution &SE) {
+  // Fast-path: zero is always foldable.
+  if (S->isZero()) return true;
+
+  // Conservatively, create an address with an immediate and a
+  // base and a scale.
+  int64_t BaseOffs = ExtractImmediate(S, SE);
+  GlobalValue *BaseGV = ExtractSymbol(S, SE);
+
+  // If there's anything else involved, it's not foldable.
+  if (!S->isZero()) return false;
+
+  // Fast-path: zero is always foldable.
+  if (BaseOffs == 0 && !BaseGV) return true;
+
+  // Conservatively, create an address with an immediate and a
+  // base and a scale.
+  TargetLowering::AddrMode AM;
+  AM.BaseOffs = BaseOffs;
+  AM.BaseGV = BaseGV;
+  AM.HasBaseReg = HasBaseReg;
+  AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
+
+  return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI);
 }
 
-/// ChangeCompareStride - If a loop termination compare instruction is the only
-/// use of its stride, and the comparison is against a constant value, try to
-/// eliminate the stride by moving the compare instruction to another stride and
-/// changing its constant operand accordingly. E.g.
-///
-/// loop:
-/// ...
-///   v1 = v1 + 3
-///   v2 = v2 + 1
-///   if (v2 < 10) goto loop
-/// =>
-/// loop:
-/// ...
-///   v1 = v1 + 3
-///   if (v1 < 30) goto loop
-ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
-                                                  IVStrideUse* &CondUse,
-                                                  const SCEV* &CondStride,
-                                                  bool PostPass) {
-  // If there's only one stride in the loop, there's nothing to do here.
-  if (IU->StrideOrder.size() < 2)
-    return Cond;
+/// FormulaSorter - This class implements an ordering for formulae which sorts
+/// the by their standalone cost.
+class FormulaSorter {
+  /// These two sets are kept empty, so that we compute standalone costs.
+  DenseSet<const SCEV *> VisitedRegs;
+  SmallPtrSet<const SCEV *, 16> Regs;
+  Loop *L;
+  LSRUse *LU;
+  ScalarEvolution &SE;
+  DominatorTree &DT;
+
+public:
+  FormulaSorter(Loop *l, LSRUse &lu, ScalarEvolution &se, DominatorTree &dt)
+    : L(l), LU(&lu), SE(se), DT(dt) {}
+
+  bool operator()(const Formula &A, const Formula &B) {
+    Cost CostA;
+    CostA.RateFormula(A, Regs, VisitedRegs, L, LU->Offsets, SE, DT);
+    Regs.clear();
+    Cost CostB;
+    CostB.RateFormula(B, Regs, VisitedRegs, L, LU->Offsets, SE, DT);
+    Regs.clear();
+    return CostA < CostB;
+  }
+};
+
+/// LSRInstance - This class holds state for the main loop strength reduction
+/// logic.
+class LSRInstance {
+  IVUsers &IU;
+  ScalarEvolution &SE;
+  DominatorTree &DT;
+  const TargetLowering *const TLI;
+  Loop *const L;
+  bool Changed;
+
+  /// IVIncInsertPos - This is the insert position that the current loop's
+  /// induction variable increment should be placed. In simple loops, this is
+  /// the latch block's terminator. But in more complicated cases, this is a
+  /// position which will dominate all the in-loop post-increment users.
+  Instruction *IVIncInsertPos;
+
+  /// Factors - Interesting factors between use strides.
+  SmallSetVector<int64_t, 8> Factors;
+
+  /// Types - Interesting use types, to facilitate truncation reuse.
+  SmallSetVector<const Type *, 4> Types;
+
+  /// Fixups - The list of operands which are to be replaced.
+  SmallVector<LSRFixup, 16> Fixups;
+
+  /// Uses - The list of interesting uses.
+  SmallVector<LSRUse, 16> Uses;
+
+  /// RegUses - Track which uses use which register candidates.
+  RegUseTracker RegUses;
+
+  void OptimizeShadowIV();
+  bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
+  ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
+  bool OptimizeLoopTermCond();
+
+  void CollectInterestingTypesAndFactors();
+  void CollectFixupsAndInitialFormulae();
+
+  LSRFixup &getNewFixup() {
+    Fixups.push_back(LSRFixup());
+    return Fixups.back();
+  }
 
-  // If there are other users of the condition's stride, don't bother trying to
-  // change the condition because the stride will still remain.
-  std::map<const SCEV *, IVUsersOfOneStride *>::iterator I =
-    IU->IVUsesByStride.find(CondStride);
-  if (I == IU->IVUsesByStride.end())
-    return Cond;
+  // Support for sharing of LSRUses between LSRFixups.
+  typedef DenseMap<const SCEV *, size_t> UseMapTy;
+  UseMapTy UseMap;
+
+  bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
+                          LSRUse::KindType Kind, const Type *AccessTy);
+
+  std::pair<size_t, int64_t> getUse(const SCEV *&Expr,
+                                    LSRUse::KindType Kind,
+                                    const Type *AccessTy);
+
+public:
+  void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
+  void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
+  void CountRegisters(const Formula &F, size_t LUIdx);
+  bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
+
+  void CollectLoopInvariantFixupsAndFormulae();
+
+  void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,
+                              unsigned Depth = 0);
+  void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);
+  void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
+  void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
+  void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
+  void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);
+  void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base);
+  void GenerateCrossUseConstantOffsets();
+  void GenerateAllReuseFormulae();
+
+  void FilterOutUndesirableDedicatedRegisters();
+  void NarrowSearchSpaceUsingHeuristics();
+
+  void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
+                    Cost &SolutionCost,
+                    SmallVectorImpl<const Formula *> &Workspace,
+                    const Cost &CurCost,
+                    const SmallPtrSet<const SCEV *, 16> &CurRegs,
+                    DenseSet<const SCEV *> &VisitedRegs) const;
+  void Solve(SmallVectorImpl<const Formula *> &Solution) const;
+
+  Value *Expand(const LSRFixup &LF,
+                const Formula &F,
+                BasicBlock::iterator IP,
+                SCEVExpander &Rewriter,
+                SmallVectorImpl<WeakVH> &DeadInsts) const;
+  void RewriteForPHI(PHINode *PN, const LSRFixup &LF,
+                     const Formula &F,
+                     SCEVExpander &Rewriter,
+                     SmallVectorImpl<WeakVH> &DeadInsts,
+                     Pass *P) const;
+  void Rewrite(const LSRFixup &LF,
+               const Formula &F,
+               SCEVExpander &Rewriter,
+               SmallVectorImpl<WeakVH> &DeadInsts,
+               Pass *P) const;
+  void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
+                         Pass *P);
+
+  LSRInstance(const TargetLowering *tli, Loop *l, Pass *P);
+
+  bool getChanged() const { return Changed; }
+
+  void print_factors_and_types(raw_ostream &OS) const;
+  void print_fixups(raw_ostream &OS) const;
+  void print_uses(raw_ostream &OS) const;
+  void print(raw_ostream &OS) const;
+  void dump() const;
+};
 
-  if (I->second->Users.size() > 1) {
-    for (ilist<IVStrideUse>::iterator II = I->second->Users.begin(),
-           EE = I->second->Users.end(); II != EE; ++II) {
-      if (II->getUser() == Cond)
-        continue;
-      if (!isInstructionTriviallyDead(II->getUser()))
-        return Cond;
-    }
-  }
+}
 
-  // Only handle constant strides for now.
-  const SCEVConstant *SC = dyn_cast<SCEVConstant>(CondStride);
-  if (!SC) return Cond;
-
-  ICmpInst::Predicate Predicate = Cond->getPredicate();
-  int64_t CmpSSInt = SC->getValue()->getSExtValue();
-  unsigned BitWidth = SE->getTypeSizeInBits(CondStride->getType());
-  uint64_t SignBit = 1ULL << (BitWidth-1);
-  const Type *CmpTy = Cond->getOperand(0)->getType();
-  const Type *NewCmpTy = NULL;
-  unsigned TyBits = SE->getTypeSizeInBits(CmpTy);
-  unsigned NewTyBits = 0;
-  const SCEV *NewStride = NULL;
-  Value *NewCmpLHS = NULL;
-  Value *NewCmpRHS = NULL;
-  int64_t Scale = 1;
-  const SCEV *NewOffset = SE->getIntegerSCEV(0, CmpTy);
-
-  if (ConstantInt *C = dyn_cast<ConstantInt>(Cond->getOperand(1))) {
-    int64_t CmpVal = C->getValue().getSExtValue();
-
-    // Check the relevant induction variable for conformance to the pattern.
-    const SCEV *IV = SE->getSCEV(Cond->getOperand(0));
-    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
-    if (!AR || !AR->isAffine())
-      return Cond;
-
-    const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
-    // Check stride constant and the comparision constant signs to detect
-    // overflow.
-    if (StartC) {
-      if ((StartC->getValue()->getSExtValue() < CmpVal && CmpSSInt < 0) ||
-          (StartC->getValue()->getSExtValue() > CmpVal && CmpSSInt > 0))
-        return Cond;
-    } else {
-      // More restrictive check for the other cases.
-      if ((CmpVal & SignBit) != (CmpSSInt & SignBit))
-        return Cond;
-    }
+/// OptimizeShadowIV - If IV is used in a int-to-float cast
+/// inside the loop then try to eliminate the cast operation.
+void LSRInstance::OptimizeShadowIV() {
+  const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
+  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
+    return;
 
-    // Look for a suitable stride / iv as replacement.
-    for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
-      std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
-        IU->IVUsesByStride.find(IU->StrideOrder[i]);
-      if (!isa<SCEVConstant>(SI->first) || SI->second->Users.empty())
-        continue;
-      int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
-      if (SSInt == CmpSSInt ||
-          abs64(SSInt) < abs64(CmpSSInt) ||
-          (SSInt % CmpSSInt) != 0)
-        continue;
+  for (IVUsers::const_iterator UI = IU.begin(), E = IU.end();
+       UI != E; /* empty */) {
+    IVUsers::const_iterator CandidateUI = UI;
+    ++UI;
+    Instruction *ShadowUse = CandidateUI->getUser();
+    const Type *DestTy = NULL;
 
-      Scale = SSInt / CmpSSInt;
-      int64_t NewCmpVal = CmpVal * Scale;
+    /* If shadow use is a int->float cast then insert a second IV
+       to eliminate this cast.
 
-      // If old icmp value fits in icmp immediate field, but the new one doesn't
-      // try something else.
-      if (TLI &&
-          TLI->isLegalICmpImmediate(CmpVal) &&
-          !TLI->isLegalICmpImmediate(NewCmpVal))
-        continue;
+         for (unsigned i = 0; i < n; ++i)
+           foo((double)i);
 
-      APInt Mul = APInt(BitWidth*2, CmpVal, true);
-      Mul = Mul * APInt(BitWidth*2, Scale, true);
-      // Check for overflow.
-      if (!Mul.isSignedIntN(BitWidth))
-        continue;
-      // Check for overflow in the stride's type too.
-      if (!Mul.isSignedIntN(SE->getTypeSizeInBits(SI->first->getType())))
-        continue;
+       is transformed into
 
-      // Watch out for overflow.
-      if (ICmpInst::isSigned(Predicate) &&
-          (CmpVal & SignBit) != (NewCmpVal & SignBit))
-        continue;
+         double d = 0.0;
+         for (unsigned i = 0; i < n; ++i, ++d)
+           foo(d);
+    */
+    if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser()))
+      DestTy = UCast->getDestTy();
+    else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser()))
+      DestTy = SCast->getDestTy();
+    if (!DestTy) continue;
 
-      // Pick the best iv to use trying to avoid a cast.
-      NewCmpLHS = NULL;
-      for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
-             E = SI->second->Users.end(); UI != E; ++UI) {
-        Value *Op = UI->getOperandValToReplace();
-
-        // If the IVStrideUse implies a cast, check for an actual cast which
-        // can be used to find the original IV expression.
-        if (SE->getEffectiveSCEVType(Op->getType()) !=
-            SE->getEffectiveSCEVType(SI->first->getType())) {
-          CastInst *CI = dyn_cast<CastInst>(Op);
-          // If it's not a simple cast, it's complicated.
-          if (!CI)
-            continue;
-          // If it's a cast from a type other than the stride type,
-          // it's complicated.
-          if (CI->getOperand(0)->getType() != SI->first->getType())
-            continue;
-          // Ok, we found the IV expression in the stride's type.
-          Op = CI->getOperand(0);
-        }
+    if (TLI) {
+      // If target does not support DestTy natively then do not apply
+      // this transformation.
+      EVT DVT = TLI->getValueType(DestTy);
+      if (!TLI->isTypeLegal(DVT)) continue;
+    }
 
-        NewCmpLHS = Op;
-        if (NewCmpLHS->getType() == CmpTy)
-          break;
-      }
-      if (!NewCmpLHS)
-        continue;
+    PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
+    if (!PH) continue;
+    if (PH->getNumIncomingValues() != 2) continue;
 
-      NewCmpTy = NewCmpLHS->getType();
-      NewTyBits = SE->getTypeSizeInBits(NewCmpTy);
-      const Type *NewCmpIntTy = IntegerType::get(Cond->getContext(), NewTyBits);
-      if (RequiresTypeConversion(NewCmpTy, CmpTy)) {
-        // Check if it is possible to rewrite it using
-        // an iv / stride of a smaller integer type.
-        unsigned Bits = NewTyBits;
-        if (ICmpInst::isSigned(Predicate))
-          --Bits;
-        uint64_t Mask = (1ULL << Bits) - 1;
-        if (((uint64_t)NewCmpVal & Mask) != (uint64_t)NewCmpVal)
-          continue;
-      }
+    const Type *SrcTy = PH->getType();
+    int Mantissa = DestTy->getFPMantissaWidth();
+    if (Mantissa == -1) continue;
+    if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)
+      continue;
 
-      // Don't rewrite if use offset is non-constant and the new type is
-      // of a different type.
-      // FIXME: too conservative?
-      if (NewTyBits != TyBits && !isa<SCEVConstant>(CondUse->getOffset()))
-        continue;
+    unsigned Entry, Latch;
+    if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
+      Entry = 0;
+      Latch = 1;
+    } else {
+      Entry = 1;
+      Latch = 0;
+    }
 
-      if (!PostPass) {
-        bool AllUsesAreAddresses = true;
-        bool AllUsesAreOutsideLoop = true;
-        std::vector<BasedUser> UsersToProcess;
-        const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
-                                                 AllUsesAreAddresses,
-                                                 AllUsesAreOutsideLoop,
-                                                 UsersToProcess);
-        // Avoid rewriting the compare instruction with an iv of new stride
-        // if it's likely the new stride uses will be rewritten using the
-        // stride of the compare instruction.
-        if (AllUsesAreAddresses &&
-            ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess))
-          continue;
-      }
+    ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
+    if (!Init) continue;
+    Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
 
-      // Avoid rewriting the compare instruction with an iv which has
-      // implicit extension or truncation built into it.
-      // TODO: This is over-conservative.
-      if (SE->getTypeSizeInBits(CondUse->getOffset()->getType()) != TyBits)
-        continue;
+    BinaryOperator *Incr =
+      dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
+    if (!Incr) continue;
+    if (Incr->getOpcode() != Instruction::Add
+        && Incr->getOpcode() != Instruction::Sub)
+      continue;
 
-      // If scale is negative, use swapped predicate unless it's testing
-      // for equality.
-      if (Scale < 0 && !Cond->isEquality())
-        Predicate = ICmpInst::getSwappedPredicate(Predicate);
+    /* Initialize new IV, double d = 0.0 in above example. */
+    ConstantInt *C = NULL;
+    if (Incr->getOperand(0) == PH)
+      C = dyn_cast<ConstantInt>(Incr->getOperand(1));
+    else if (Incr->getOperand(1) == PH)
+      C = dyn_cast<ConstantInt>(Incr->getOperand(0));
+    else
+      continue;
 
-      NewStride = IU->StrideOrder[i];
-      if (!isa<PointerType>(NewCmpTy))
-        NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal);
-      else {
-        Constant *CI = ConstantInt::get(NewCmpIntTy, NewCmpVal);
-        NewCmpRHS = ConstantExpr::getIntToPtr(CI, NewCmpTy);
-      }
-      NewOffset = TyBits == NewTyBits
-        ? SE->getMulExpr(CondUse->getOffset(),
-                         SE->getConstant(CmpTy, Scale))
-        : SE->getConstant(NewCmpIntTy,
-          cast<SCEVConstant>(CondUse->getOffset())->getValue()
-            ->getSExtValue()*Scale);
-      break;
-    }
-  }
+    if (!C) continue;
 
-  // Forgo this transformation if it the increment happens to be
-  // unfortunately positioned after the condition, and the condition
-  // has multiple uses which prevent it from being moved immediately
-  // before the branch. See
-  // test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-*.ll
-  // for an example of this situation.
-  if (!Cond->hasOneUse()) {
-    for (BasicBlock::iterator I = Cond, E = Cond->getParent()->end();
-         I != E; ++I)
-      if (I == NewCmpLHS)
-        return Cond;
-  }
+    // Ignore negative constants, as the code below doesn't handle them
+    // correctly. TODO: Remove this restriction.
+    if (!C->getValue().isStrictlyPositive()) continue;
 
-  if (NewCmpRHS) {
-    // Create a new compare instruction using new stride / iv.
-    ICmpInst *OldCond = Cond;
-    // Insert new compare instruction.
-    Cond = new ICmpInst(OldCond, Predicate, NewCmpLHS, NewCmpRHS,
-                        L->getHeader()->getName() + ".termcond");
+    /* Add new PHINode. */
+    PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH);
 
-    DEBUG(dbgs() << "    Change compare stride in Inst " << *OldCond);
-    DEBUG(dbgs() << " to " << *Cond << '\n');
+    /* create new increment. '++d' in above example. */
+    Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
+    BinaryOperator *NewIncr =
+      BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
+                               Instruction::FAdd : Instruction::FSub,
+                             NewPH, CFP, "IV.S.next.", Incr);
 
-    // Remove the old compare instruction. The old indvar is probably dead too.
-    DeadInsts.push_back(CondUse->getOperandValToReplace());
-    OldCond->replaceAllUsesWith(Cond);
-    OldCond->eraseFromParent();
+    NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
+    NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
 
-    IU->IVUsesByStride[NewStride]->addUser(NewOffset, Cond, NewCmpLHS);
-    CondUse = &IU->IVUsesByStride[NewStride]->Users.back();
-    CondStride = NewStride;
-    ++NumEliminated;
-    Changed = true;
+    /* Remove cast operation */
+    ShadowUse->replaceAllUsesWith(NewPH);
+    ShadowUse->eraseFromParent();
+    break;
   }
+}
 
-  return Cond;
+/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
+/// set the IV user and stride information and return true, otherwise return
+/// false.
+bool LSRInstance::FindIVUserForCond(ICmpInst *Cond,
+                                    IVStrideUse *&CondUse) {
+  for (IVUsers::iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
+    if (UI->getUser() == Cond) {
+      // NOTE: we could handle setcc instructions with multiple uses here, but
+      // InstCombine does it as well for simple uses, it's not clear that it
+      // occurs enough in real life to handle.
+      CondUse = UI;
+      return true;
+    }
+  return false;
 }
 
 /// OptimizeMax - Rewrite the loop's terminating condition if it uses
@@ -2088,7 +1405,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
 /// are designed around them. The most obvious example of this is the
 /// LoopInfo analysis, which doesn't remember trip count values. It
 /// expects to be able to rediscover the trip count each time it is
-/// needed, and it does this using a simple analyis that only succeeds if
+/// needed, and it does this using a simple analysis that only succeeds if
 /// the loop has a canonical induction variable.
 ///
 /// However, when it comes time to generate code, the maximum operation
@@ -2098,8 +1415,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
 /// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
 /// the instructions for the maximum computation.
 ///
-ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,
-                                          IVStrideUse* &CondUse) {
+ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
   // Check that the loop matches the pattern we're looking for.
   if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
       Cond->getPredicate() != CmpInst::ICMP_NE)
@@ -2108,19 +1424,19 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,
   SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
   if (!Sel || !Sel->hasOneUse()) return Cond;
 
-  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+  const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
   if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
     return Cond;
-  const SCEV *One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType());
+  const SCEV *One = SE.getIntegerSCEV(1, BackedgeTakenCount->getType());
 
   // Add one to the backedge-taken count to get the trip count.
-  const SCEV *IterationCount = SE->getAddExpr(BackedgeTakenCount, One);
+  const SCEV *IterationCount = SE.getAddExpr(BackedgeTakenCount, One);
 
   // Check for a max calculation that matches the pattern.
   if (!isa<SCEVSMaxExpr>(IterationCount) && !isa<SCEVUMaxExpr>(IterationCount))
     return Cond;
   const SCEVNAryExpr *Max = cast<SCEVNAryExpr>(IterationCount);
-  if (Max != SE->getSCEV(Sel)) return Cond;
+  if (Max != SE.getSCEV(Sel)) return Cond;
 
   // To handle a max with more than two operands, this optimization would
   // require additional checking and setup.
@@ -2130,14 +1446,13 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,
   const SCEV *MaxLHS = Max->getOperand(0);
   const SCEV *MaxRHS = Max->getOperand(1);
   if (!MaxLHS || MaxLHS != One) return Cond;
-
   // Check the relevant induction variable for conformance to
   // the pattern.
-  const SCEV *IV = SE->getSCEV(Cond->getOperand(0));
+  const SCEV *IV = SE.getSCEV(Cond->getOperand(0));
   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
   if (!AR || !AR->isAffine() ||
       AR->getStart() != One ||
-      AR->getStepRecurrence(*SE) != One)
+      AR->getStepRecurrence(SE) != One)
     return Cond;
 
   assert(AR->getLoop() == L &&
@@ -2146,9 +1461,9 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,
   // Check the right operand of the select, and remember it, as it will
   // be used in the new comparison instruction.
   Value *NewRHS = 0;
-  if (SE->getSCEV(Sel->getOperand(1)) == MaxRHS)
+  if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
     NewRHS = Sel->getOperand(1);
-  else if (SE->getSCEV(Sel->getOperand(2)) == MaxRHS)
+  else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
     NewRHS = Sel->getOperand(2);
   if (!NewRHS) return Cond;
 
@@ -2175,552 +1490,1804 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,
   return NewCond;
 }
 
-/// OptimizeShadowIV - If IV is used in a int-to-float cast
-/// inside the loop then try to eliminate the cast opeation.
-void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
+/// OptimizeLoopTermCond - Change loop terminating condition to use the
+/// postinc iv when possible.
+bool
+LSRInstance::OptimizeLoopTermCond() {
+  SmallPtrSet<Instruction *, 4> PostIncs;
 
-  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
-  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
-    return;
+  BasicBlock *LatchBlock = L->getLoopLatch();
+  SmallVector<BasicBlock*, 8> ExitingBlocks;
+  L->getExitingBlocks(ExitingBlocks);
+
+  for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+    BasicBlock *ExitingBlock = ExitingBlocks[i];
 
-  for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e;
-       ++Stride) {
-    std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
-      IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
-    assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
-    if (!isa<SCEVConstant>(SI->first))
+    // Get the terminating condition for the loop if possible.  If we
+    // can, we want to change it to use a post-incremented version of its
+    // induction variable, to allow coalescing the live ranges for the IV into
+    // one register value.
+
+    BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+    if (!TermBr)
+      continue;
+    // FIXME: Overly conservative, termination condition could be an 'or' etc..
+    if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
       continue;
 
-    for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
-           E = SI->second->Users.end(); UI != E; /* empty */) {
-      ilist<IVStrideUse>::iterator CandidateUI = UI;
-      ++UI;
-      Instruction *ShadowUse = CandidateUI->getUser();
-      const Type *DestTy = NULL;
-
-      /* If shadow use is a int->float cast then insert a second IV
-         to eliminate this cast.
-
-           for (unsigned i = 0; i < n; ++i)
-             foo((double)i);
-
-         is transformed into
-
-           double d = 0.0;
-           for (unsigned i = 0; i < n; ++i, ++d)
-             foo(d);
-      */
-      if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser()))
-        DestTy = UCast->getDestTy();
-      else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser()))
-        DestTy = SCast->getDestTy();
-      if (!DestTy) continue;
-
-      if (TLI) {
-        // If target does not support DestTy natively then do not apply
-        // this transformation.
-        EVT DVT = TLI->getValueType(DestTy);
-        if (!TLI->isTypeLegal(DVT)) continue;
-      }
+    // Search IVUsesByStride to find Cond's IVUse if there is one.
+    IVStrideUse *CondUse = 0;
+    ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
+    if (!FindIVUserForCond(Cond, CondUse))
+      continue;
+
+    // If the trip count is computed in terms of a max (due to ScalarEvolution
+    // being unable to find a sufficient guard, for example), change the loop
+    // comparison to use SLT or ULT instead of NE.
+    // One consequence of doing this now is that it disrupts the count-down
+    // optimization. That's not always a bad thing though, because in such
+    // cases it may still be worthwhile to avoid a max.
+    Cond = OptimizeMax(Cond, CondUse);
+
+    // If this exiting block dominates the latch block, it may also use
+    // the post-inc value if it won't be shared with other uses.
+    // Check for dominance.
+    if (!DT.dominates(ExitingBlock, LatchBlock))
+      continue;
 
-      PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
-      if (!PH) continue;
-      if (PH->getNumIncomingValues() != 2) continue;
+    // Conservatively avoid trying to use the post-inc value in non-latch
+    // exits if there may be pre-inc users in intervening blocks.
+    if (LatchBlock != ExitingBlock)
+      for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
+        // Test if the use is reachable from the exiting block. This dominator
+        // query is a conservative approximation of reachability.
+        if (&*UI != CondUse &&
+            !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
+          // Conservatively assume there may be reuse if the quotient of their
+          // strides could be a legal scale.
+          const SCEV *A = CondUse->getStride();
+          const SCEV *B = UI->getStride();
+          if (SE.getTypeSizeInBits(A->getType()) !=
+              SE.getTypeSizeInBits(B->getType())) {
+            if (SE.getTypeSizeInBits(A->getType()) >
+                SE.getTypeSizeInBits(B->getType()))
+              B = SE.getSignExtendExpr(B, A->getType());
+            else
+              A = SE.getSignExtendExpr(A, B->getType());
+          }
+          if (const SCEVConstant *D =
+                dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) {
+            // Stride of one or negative one can have reuse with non-addresses.
+            if (D->getValue()->isOne() ||
+                D->getValue()->isAllOnesValue())
+              goto decline_post_inc;
+            // Avoid weird situations.
+            if (D->getValue()->getValue().getMinSignedBits() >= 64 ||
+                D->getValue()->getValue().isMinSignedValue())
+              goto decline_post_inc;
+            // Without TLI, assume that any stride might be valid, and so any
+            // use might be shared.
+            if (!TLI)
+              goto decline_post_inc;
+            // Check for possible scaled-address reuse.
+            const Type *AccessTy = getAccessType(UI->getUser());
+            TargetLowering::AddrMode AM;
+            AM.Scale = D->getValue()->getSExtValue();
+            if (TLI->isLegalAddressingMode(AM, AccessTy))
+              goto decline_post_inc;
+            AM.Scale = -AM.Scale;
+            if (TLI->isLegalAddressingMode(AM, AccessTy))
+              goto decline_post_inc;
+          }
+        }
 
-      const Type *SrcTy = PH->getType();
-      int Mantissa = DestTy->getFPMantissaWidth();
-      if (Mantissa == -1) continue;
-      if ((int)SE->getTypeSizeInBits(SrcTy) > Mantissa)
-        continue;
+    DEBUG(dbgs() << "  Change loop exiting icmp to use postinc iv: "
+                 << *Cond << '\n');
 
-      unsigned Entry, Latch;
-      if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
-        Entry = 0;
-        Latch = 1;
+    // It's possible for the setcc instruction to be anywhere in the loop, and
+    // possible for it to have multiple users.  If it is not immediately before
+    // the exiting block branch, move it.
+    if (&*++BasicBlock::iterator(Cond) != TermBr) {
+      if (Cond->hasOneUse()) {
+        Cond->moveBefore(TermBr);
       } else {
-        Entry = 1;
-        Latch = 0;
+        // Clone the terminating condition and insert into the loopend.
+        ICmpInst *OldCond = Cond;
+        Cond = cast<ICmpInst>(Cond->clone());
+        Cond->setName(L->getHeader()->getName() + ".termcond");
+        ExitingBlock->getInstList().insert(TermBr, Cond);
+
+        // Clone the IVUse, as the old use still exists!
+        CondUse = &IU.AddUser(CondUse->getStride(), CondUse->getOffset(),
+                              Cond, CondUse->getOperandValToReplace());
+        TermBr->replaceUsesOfWith(OldCond, Cond);
       }
+    }
 
-      ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
-      if (!Init) continue;
-      Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
+    // If we get to here, we know that we can transform the setcc instruction to
+    // use the post-incremented version of the IV, allowing us to coalesce the
+    // live ranges for the IV correctly.
+    CondUse->setOffset(SE.getMinusSCEV(CondUse->getOffset(),
+                                       CondUse->getStride()));
+    CondUse->setIsUseOfPostIncrementedValue(true);
+    Changed = true;
 
-      BinaryOperator *Incr =
-        dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
-      if (!Incr) continue;
-      if (Incr->getOpcode() != Instruction::Add
-          && Incr->getOpcode() != Instruction::Sub)
-        continue;
+    PostIncs.insert(Cond);
+  decline_post_inc:;
+  }
 
-      /* Initialize new IV, double d = 0.0 in above example. */
-      ConstantInt *C = NULL;
-      if (Incr->getOperand(0) == PH)
-        C = dyn_cast<ConstantInt>(Incr->getOperand(1));
-      else if (Incr->getOperand(1) == PH)
-        C = dyn_cast<ConstantInt>(Incr->getOperand(0));
-      else
-        continue;
+  // Determine an insertion point for the loop induction variable increment. It
+  // must dominate all the post-inc comparisons we just set up, and it must
+  // dominate the loop latch edge.
+  IVIncInsertPos = L->getLoopLatch()->getTerminator();
+  for (SmallPtrSet<Instruction *, 4>::const_iterator I = PostIncs.begin(),
+       E = PostIncs.end(); I != E; ++I) {
+    BasicBlock *BB =
+      DT.findNearestCommonDominator(IVIncInsertPos->getParent(),
+                                    (*I)->getParent());
+    if (BB == (*I)->getParent())
+      IVIncInsertPos = *I;
+    else if (BB != IVIncInsertPos->getParent())
+      IVIncInsertPos = BB->getTerminator();
+  }
+
+  return Changed;
+}
 
-      if (!C) continue;
+bool
+LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
+                                LSRUse::KindType Kind, const Type *AccessTy) {
+  int64_t NewMinOffset = LU.MinOffset;
+  int64_t NewMaxOffset = LU.MaxOffset;
+  const Type *NewAccessTy = AccessTy;
+
+  // Check for a mismatched kind. It's tempting to collapse mismatched kinds to
+  // something conservative, however this can pessimize in the case that one of
+  // the uses will have all its uses outside the loop, for example.
+  if (LU.Kind != Kind)
+    return false;
+  // Conservatively assume HasBaseReg is true for now.
+  if (NewOffset < LU.MinOffset) {
+    if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, /*HasBaseReg=*/true,
+                          Kind, AccessTy, TLI))
+      return false;
+    NewMinOffset = NewOffset;
+  } else if (NewOffset > LU.MaxOffset) {
+    if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, /*HasBaseReg=*/true,
+                          Kind, AccessTy, TLI))
+      return false;
+    NewMaxOffset = NewOffset;
+  }
+  // Check for a mismatched access type, and fall back conservatively as needed.
+  if (Kind == LSRUse::Address && AccessTy != LU.AccessTy)
+    NewAccessTy = Type::getVoidTy(AccessTy->getContext());
+
+  // Update the use.
+  LU.MinOffset = NewMinOffset;
+  LU.MaxOffset = NewMaxOffset;
+  LU.AccessTy = NewAccessTy;
+  if (NewOffset != LU.Offsets.back())
+    LU.Offsets.push_back(NewOffset);
+  return true;
+}
 
-      // Ignore negative constants, as the code below doesn't handle them
-      // correctly. TODO: Remove this restriction.
-      if (!C->getValue().isStrictlyPositive()) continue;
+/// getUse - Return an LSRUse index and an offset value for a fixup which
+/// needs the given expression, with the given kind and optional access type.
+/// Either reuse an existing use or create a new one, as needed.
+std::pair<size_t, int64_t>
+LSRInstance::getUse(const SCEV *&Expr,
+                    LSRUse::KindType Kind, const Type *AccessTy) {
+  const SCEV *Copy = Expr;
+  int64_t Offset = ExtractImmediate(Expr, SE);
+
+  // Basic uses can't accept any offset, for example.
+  if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true, Kind, AccessTy, TLI)) {
+    Expr = Copy;
+    Offset = 0;
+  }
 
-      /* Add new PHINode. */
-      PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH);
+  std::pair<UseMapTy::iterator, bool> P =
+    UseMap.insert(std::make_pair(Expr, 0));
+  if (!P.second) {
+    // A use already existed with this base.
+    size_t LUIdx = P.first->second;
+    LSRUse &LU = Uses[LUIdx];
+    if (reconcileNewOffset(LU, Offset, Kind, AccessTy))
+      // Reuse this use.
+      return std::make_pair(LUIdx, Offset);
+  }
 
-      /* create new increment. '++d' in above example. */
-      Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
-      BinaryOperator *NewIncr =
-        BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
-                                 Instruction::FAdd : Instruction::FSub,
-                               NewPH, CFP, "IV.S.next.", Incr);
+  // Create a new use.
+  size_t LUIdx = Uses.size();
+  P.first->second = LUIdx;
+  Uses.push_back(LSRUse(Kind, AccessTy));
+  LSRUse &LU = Uses[LUIdx];
 
-      NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
-      NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
+  // We don't need to track redundant offsets, but we don't need to go out
+  // of our way here to avoid them.
+  if (LU.Offsets.empty() || Offset != LU.Offsets.back())
+    LU.Offsets.push_back(Offset);
 
-      /* Remove cast operation */
-      ShadowUse->replaceAllUsesWith(NewPH);
-      ShadowUse->eraseFromParent();
-      NumShadow++;
-      break;
-    }
-  }
+  LU.MinOffset = Offset;
+  LU.MaxOffset = Offset;
+  return std::make_pair(LUIdx, Offset);
 }
 
-/// OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar
-/// uses in the loop, look to see if we can eliminate some, in favor of using
-/// common indvars for the different uses.
-void LoopStrengthReduce::OptimizeIndvars(Loop *L) {
-  // TODO: implement optzns here.
+void LSRInstance::CollectInterestingTypesAndFactors() {
+  SmallSetVector<const SCEV *, 4> Strides;
+
+  // Collect interesting types and strides.
+  for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
+    const SCEV *Stride = UI->getStride();
+
+    // Collect interesting types.
+    Types.insert(SE.getEffectiveSCEVType(Stride->getType()));
 
-  OptimizeShadowIV(L);
+    // Add the stride for this loop.
+    Strides.insert(Stride);
+
+    // Add strides for other mentioned loops.
+    for (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(UI->getOffset());
+         AR; AR = dyn_cast<SCEVAddRecExpr>(AR->getStart()))
+      Strides.insert(AR->getStepRecurrence(SE));
+  }
+
+  // Compute interesting factors from the set of interesting strides.
+  for (SmallSetVector<const SCEV *, 4>::const_iterator
+       I = Strides.begin(), E = Strides.end(); I != E; ++I)
+    for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
+         next(I); NewStrideIter != E; ++NewStrideIter) {
+      const SCEV *OldStride = *I;
+      const SCEV *NewStride = *NewStrideIter;
+
+      if (SE.getTypeSizeInBits(OldStride->getType()) !=
+          SE.getTypeSizeInBits(NewStride->getType())) {
+        if (SE.getTypeSizeInBits(OldStride->getType()) >
+            SE.getTypeSizeInBits(NewStride->getType()))
+          NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType());
+        else
+          OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType());
+      }
+      if (const SCEVConstant *Factor =
+            dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride,
+                                                        SE, true))) {
+        if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
+          Factors.insert(Factor->getValue()->getValue().getSExtValue());
+      } else if (const SCEVConstant *Factor =
+                   dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride,
+                                                               NewStride,
+                                                               SE, true))) {
+        if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
+          Factors.insert(Factor->getValue()->getValue().getSExtValue());
+      }
+    }
+
+  // If all uses use the same type, don't bother looking for truncation-based
+  // reuse.
+  if (Types.size() == 1)
+    Types.clear();
+
+  DEBUG(print_factors_and_types(dbgs()));
 }
 
-bool LoopStrengthReduce::StrideMightBeShared(const SCEV* Stride, Loop *L,
-                                             bool CheckPreInc) {
-  int64_t SInt = cast<SCEVConstant>(Stride)->getValue()->getSExtValue();
-  for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
-    std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
-      IU->IVUsesByStride.find(IU->StrideOrder[i]);
-    const SCEV *Share = SI->first;
-    if (!isa<SCEVConstant>(SI->first) || Share == Stride)
-      continue;
-    int64_t SSInt = cast<SCEVConstant>(Share)->getValue()->getSExtValue();
-    if (SSInt == SInt)
-      return true; // This can definitely be reused.
-    if (unsigned(abs64(SSInt)) < SInt || (SSInt % SInt) != 0)
-      continue;
-    int64_t Scale = SSInt / SInt;
-    bool AllUsesAreAddresses = true;
-    bool AllUsesAreOutsideLoop = true;
-    std::vector<BasedUser> UsersToProcess;
-    const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
-                                             AllUsesAreAddresses,
-                                             AllUsesAreOutsideLoop,
-                                             UsersToProcess);
-    if (AllUsesAreAddresses &&
-        ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess)) {
-      if (!CheckPreInc)
-        return true;
-      // Any pre-inc iv use?
-      IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[Share];
-      for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
-             E = StrideUses.Users.end(); I != E; ++I) {
-        if (!I->isUseOfPostIncrementedValue())
-          return true;
+void LSRInstance::CollectFixupsAndInitialFormulae() {
+  for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
+    // Record the uses.
+    LSRFixup &LF = getNewFixup();
+    LF.UserInst = UI->getUser();
+    LF.OperandValToReplace = UI->getOperandValToReplace();
+    if (UI->isUseOfPostIncrementedValue())
+      LF.PostIncLoop = L;
+
+    LSRUse::KindType Kind = LSRUse::Basic;
+    const Type *AccessTy = 0;
+    if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) {
+      Kind = LSRUse::Address;
+      AccessTy = getAccessType(LF.UserInst);
+    }
+
+    const SCEV *S = IU.getCanonicalExpr(*UI);
+
+    // Equality (== and !=) ICmps are special. We can rewrite (i == N) as
+    // (N - i == 0), and this allows (N - i) to be the expression that we work
+    // with rather than just N or i, so we can consider the register
+    // requirements for both N and i at the same time. Limiting this code to
+    // equality icmps is not a problem because all interesting loops use
+    // equality icmps, thanks to IndVarSimplify.
+    if (ICmpInst *CI = dyn_cast<ICmpInst>(LF.UserInst))
+      if (CI->isEquality()) {
+        // Swap the operands if needed to put the OperandValToReplace on the
+        // left, for consistency.
+        Value *NV = CI->getOperand(1);
+        if (NV == LF.OperandValToReplace) {
+          CI->setOperand(1, CI->getOperand(0));
+          CI->setOperand(0, NV);
+        }
+
+        // x == y  -->  x - y == 0
+        const SCEV *N = SE.getSCEV(NV);
+        if (N->isLoopInvariant(L)) {
+          Kind = LSRUse::ICmpZero;
+          S = SE.getMinusSCEV(N, S);
+        }
+
+        // -1 and the negations of all interesting strides (except the negation
+        // of -1) are now also interesting.
+        for (size_t i = 0, e = Factors.size(); i != e; ++i)
+          if (Factors[i] != -1)
+            Factors.insert(-(uint64_t)Factors[i]);
+        Factors.insert(-1);
       }
+
+    // Set up the initial formula for this use.
+    std::pair<size_t, int64_t> P = getUse(S, Kind, AccessTy);
+    LF.LUIdx = P.first;
+    LF.Offset = P.second;
+    LSRUse &LU = Uses[LF.LUIdx];
+    LU.AllFixupsOutsideLoop &= !L->contains(LF.UserInst);
+
+    // If this is the first use of this LSRUse, give it a formula.
+    if (LU.Formulae.empty()) {
+      InsertInitialFormula(S, LU, LF.LUIdx);
+      CountRegisters(LU.Formulae.back(), LF.LUIdx);
     }
   }
-  return false;
+
+  DEBUG(print_fixups(dbgs()));
 }
 
-/// isUsedByExitBranch - Return true if icmp is used by a loop terminating
-/// conditional branch or it's and / or with other conditions before being used
-/// as the condition.
-static bool isUsedByExitBranch(ICmpInst *Cond, Loop *L) {
-  BasicBlock *CondBB = Cond->getParent();
-  if (!L->isLoopExiting(CondBB))
-    return false;
-  BranchInst *TermBr = dyn_cast<BranchInst>(CondBB->getTerminator());
-  if (!TermBr || !TermBr->isConditional())
+void
+LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
+  Formula F;
+  F.InitialMatch(S, L, SE, DT);
+  bool Inserted = InsertFormula(LU, LUIdx, F);
+  assert(Inserted && "Initial formula already exists!"); (void)Inserted;
+}
+
+void
+LSRInstance::InsertSupplementalFormula(const SCEV *S,
+                                       LSRUse &LU, size_t LUIdx) {
+  Formula F;
+  F.BaseRegs.push_back(S);
+  F.AM.HasBaseReg = true;
+  bool Inserted = InsertFormula(LU, LUIdx, F);
+  assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
+}
+
+/// CountRegisters - Note which registers are used by the given formula,
+/// updating RegUses.
+void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
+  if (F.ScaledReg)
+    RegUses.CountRegister(F.ScaledReg, LUIdx);
+  for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
+       E = F.BaseRegs.end(); I != E; ++I)
+    RegUses.CountRegister(*I, LUIdx);
+}
+
+/// InsertFormula - If the given formula has not yet been inserted, add it to
+/// the list, and return true. Return false otherwise.
+bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
+  if (!LU.InsertFormula(F))
     return false;
 
-  Value *User = *Cond->use_begin();
-  Instruction *UserInst = dyn_cast<Instruction>(User);
-  while (UserInst &&
-         (UserInst->getOpcode() == Instruction::And ||
-          UserInst->getOpcode() == Instruction::Or)) {
-    if (!UserInst->hasOneUse() || UserInst->getParent() != CondBB)
-      return false;
-    User = *User->use_begin();
-    UserInst = dyn_cast<Instruction>(User);
+  CountRegisters(F, LUIdx);
+  return true;
+}
+
+/// CollectLoopInvariantFixupsAndFormulae - Check for other uses of
+/// loop-invariant values which we're tracking. These other uses will pin these
+/// values in registers, making them less profitable for elimination.
+/// TODO: This currently misses non-constant addrec step registers.
+/// TODO: Should this give more weight to users inside the loop?
+void
+LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
+  SmallVector<const SCEV *, 8> Worklist(RegUses.begin(), RegUses.end());
+  SmallPtrSet<const SCEV *, 8> Inserted;
+
+  while (!Worklist.empty()) {
+    const SCEV *S = Worklist.pop_back_val();
+
+    if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
+      Worklist.insert(Worklist.end(), N->op_begin(), N->op_end());
+    else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
+      Worklist.push_back(C->getOperand());
+    else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
+      Worklist.push_back(D->getLHS());
+      Worklist.push_back(D->getRHS());
+    } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+      if (!Inserted.insert(U)) continue;
+      const Value *V = U->getValue();
+      if (const Instruction *Inst = dyn_cast<Instruction>(V))
+        if (L->contains(Inst)) continue;
+      for (Value::use_const_iterator UI = V->use_begin(), UE = V->use_end();
+           UI != UE; ++UI) {
+        const Instruction *UserInst = dyn_cast<Instruction>(*UI);
+        // Ignore non-instructions.
+        if (!UserInst)
+          continue;
+        // Ignore instructions in other functions (as can happen with
+        // Constants).
+        if (UserInst->getParent()->getParent() != L->getHeader()->getParent())
+          continue;
+        // Ignore instructions not dominated by the loop.
+        const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
+          UserInst->getParent() :
+          cast<PHINode>(UserInst)->getIncomingBlock(
+            PHINode::getIncomingValueNumForOperand(UI.getOperandNo()));
+        if (!DT.dominates(L->getHeader(), UseBB))
+          continue;
+        // Ignore uses which are part of other SCEV expressions, to avoid
+        // analyzing them multiple times.
+        if (SE.isSCEVable(UserInst->getType()) &&
+            !isa<SCEVUnknown>(SE.getSCEV(const_cast<Instruction *>(UserInst))))
+          continue;
+        // Ignore icmp instructions which are already being analyzed.
+        if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
+          unsigned OtherIdx = !UI.getOperandNo();
+          Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
+          if (SE.getSCEV(OtherOp)->hasComputableLoopEvolution(L))
+            continue;
+        }
+
+        LSRFixup &LF = getNewFixup();
+        LF.UserInst = const_cast<Instruction *>(UserInst);
+        LF.OperandValToReplace = UI.getUse();
+        std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, 0);
+        LF.LUIdx = P.first;
+        LF.Offset = P.second;
+        LSRUse &LU = Uses[LF.LUIdx];
+        LU.AllFixupsOutsideLoop &= L->contains(LF.UserInst);
+        InsertSupplementalFormula(U, LU, LF.LUIdx);
+        CountRegisters(LU.Formulae.back(), Uses.size() - 1);
+        break;
+      }
+    }
   }
-  return User == TermBr;
 }
 
-static bool ShouldCountToZero(ICmpInst *Cond, IVStrideUse* &CondUse,
-                              ScalarEvolution *SE, Loop *L,
-                              const TargetLowering *TLI = 0) {
-  if (!L->contains(Cond))
-    return false;
+/// CollectSubexprs - Split S into subexpressions which can be pulled out into
+/// separate registers. If C is non-null, multiply each subexpression by C.
+static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
+                            SmallVectorImpl<const SCEV *> &Ops,
+                            ScalarEvolution &SE) {
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    // Break out add operands.
+    for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+         I != E; ++I)
+      CollectSubexprs(*I, C, Ops, SE);
+    return;
+  } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    // Split a non-zero base out of an addrec.
+    if (!AR->getStart()->isZero()) {
+      CollectSubexprs(SE.getAddRecExpr(SE.getIntegerSCEV(0, AR->getType()),
+                                       AR->getStepRecurrence(SE),
+                                       AR->getLoop()), C, Ops, SE);
+      CollectSubexprs(AR->getStart(), C, Ops, SE);
+      return;
+    }
+  } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
+    // Break (C * (a + b + c)) into C*a + C*b + C*c.
+    if (Mul->getNumOperands() == 2)
+      if (const SCEVConstant *Op0 =
+            dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
+        CollectSubexprs(Mul->getOperand(1),
+                        C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0,
+                        Ops, SE);
+        return;
+      }
+  }
 
-  if (!isa<SCEVConstant>(CondUse->getOffset()))
-    return false;
+  // Otherwise use the value itself.
+  Ops.push_back(C ? SE.getMulExpr(C, S) : S);
+}
 
-  // Handle only tests for equality for the moment.
-  if (!Cond->isEquality() || !Cond->hasOneUse())
-    return false;
-  if (!isUsedByExitBranch(Cond, L))
-    return false;
+/// GenerateReassociations - Split out subexpressions from adds and the bases of
+/// addrecs.
+void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
+                                         Formula Base,
+                                         unsigned Depth) {
+  // Arbitrarily cap recursion to protect compile time.
+  if (Depth >= 3) return;
+
+  for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
+    const SCEV *BaseReg = Base.BaseRegs[i];
+
+    SmallVector<const SCEV *, 8> AddOps;
+    CollectSubexprs(BaseReg, 0, AddOps, SE);
+    if (AddOps.size() == 1) continue;
+
+    for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
+         JE = AddOps.end(); J != JE; ++J) {
+      // Don't pull a constant into a register if the constant could be folded
+      // into an immediate field.
+      if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset,
+                           Base.getNumRegs() > 1,
+                           LU.Kind, LU.AccessTy, TLI, SE))
+        continue;
 
-  Value *CondOp0 = Cond->getOperand(0);
-  const SCEV *IV = SE->getSCEV(CondOp0);
-  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
-  if (!AR || !AR->isAffine())
-    return false;
+      // Collect all operands except *J.
+      SmallVector<const SCEV *, 8> InnerAddOps;
+      for (SmallVectorImpl<const SCEV *>::const_iterator K = AddOps.begin(),
+           KE = AddOps.end(); K != KE; ++K)
+        if (K != J)
+          InnerAddOps.push_back(*K);
+
+      // Don't leave just a constant behind in a register if the constant could
+      // be folded into an immediate field.
+      if (InnerAddOps.size() == 1 &&
+          isAlwaysFoldable(InnerAddOps[0], LU.MinOffset, LU.MaxOffset,
+                           Base.getNumRegs() > 1,
+                           LU.Kind, LU.AccessTy, TLI, SE))
+        continue;
 
-  const SCEVConstant *SC = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
-  if (!SC || SC->getValue()->getSExtValue() < 0)
-    // If it's already counting down, don't do anything.
-    return false;
+      Formula F = Base;
+      F.BaseRegs[i] = SE.getAddExpr(InnerAddOps);
+      F.BaseRegs.push_back(*J);
+      if (InsertFormula(LU, LUIdx, F))
+        // If that formula hadn't been seen before, recurse to find more like
+        // it.
+        GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth+1);
+    }
+  }
+}
 
-  // If the RHS of the comparison is not an loop invariant, the rewrite
-  // cannot be done. Also bail out if it's already comparing against a zero.
-  // If we are checking this before cmp stride optimization, check if it's
-  // comparing against a already legal immediate.
-  Value *RHS = Cond->getOperand(1);
-  ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS);
-  if (!L->isLoopInvariant(RHS) ||
-      (RHSC && RHSC->isZero()) ||
-      (RHSC && TLI && TLI->isLegalICmpImmediate(RHSC->getSExtValue())))
-    return false;
+/// GenerateCombinations - Generate a formula consisting of all of the
+/// loop-dominating registers added into a single register.
+void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
+                                       Formula Base) {
+  // This method is only interesting on a plurality of registers.
+  if (Base.BaseRegs.size() <= 1) return;
+
+  Formula F = Base;
+  F.BaseRegs.clear();
+  SmallVector<const SCEV *, 4> Ops;
+  for (SmallVectorImpl<const SCEV *>::const_iterator
+       I = Base.BaseRegs.begin(), E = Base.BaseRegs.end(); I != E; ++I) {
+    const SCEV *BaseReg = *I;
+    if (BaseReg->properlyDominates(L->getHeader(), &DT) &&
+        !BaseReg->hasComputableLoopEvolution(L))
+      Ops.push_back(BaseReg);
+    else
+      F.BaseRegs.push_back(BaseReg);
+  }
+  if (Ops.size() > 1) {
+    const SCEV *Sum = SE.getAddExpr(Ops);
+    // TODO: If Sum is zero, it probably means ScalarEvolution missed an
+    // opportunity to fold something. For now, just ignore such cases
+    // rather than proceed with zero in a register.
+    if (!Sum->isZero()) {
+      F.BaseRegs.push_back(Sum);
+      (void)InsertFormula(LU, LUIdx, F);
+    }
+  }
+}
 
-  // Make sure the IV is only used for counting.  Value may be preinc or
-  // postinc; 2 uses in either case.
-  if (!CondOp0->hasNUses(2))
-    return false;
+/// GenerateSymbolicOffsets - Generate reuse formulae using symbolic offsets.
+void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
+                                          Formula Base) {
+  // We can't add a symbolic offset if the address already contains one.
+  if (Base.AM.BaseGV) return;
 
-  return true;
+  for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
+    const SCEV *G = Base.BaseRegs[i];
+    GlobalValue *GV = ExtractSymbol(G, SE);
+    if (G->isZero() || !GV)
+      continue;
+    Formula F = Base;
+    F.AM.BaseGV = GV;
+    if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
+                    LU.Kind, LU.AccessTy, TLI))
+      continue;
+    F.BaseRegs[i] = G;
+    (void)InsertFormula(LU, LUIdx, F);
+  }
 }
 
-/// OptimizeLoopTermCond - Change loop terminating condition to use the
-/// postinc iv when possible.
-void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
-  BasicBlock *LatchBlock = L->getLoopLatch();
-  bool LatchExit = L->isLoopExiting(LatchBlock);
-  SmallVector<BasicBlock*, 8> ExitingBlocks;
-  L->getExitingBlocks(ExitingBlocks);
+/// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.
+void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
+                                          Formula Base) {
+  // TODO: For now, just add the min and max offset, because it usually isn't
+  // worthwhile looking at everything inbetween.
+  SmallVector<int64_t, 4> Worklist;
+  Worklist.push_back(LU.MinOffset);
+  if (LU.MaxOffset != LU.MinOffset)
+    Worklist.push_back(LU.MaxOffset);
+
+  for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
+    const SCEV *G = Base.BaseRegs[i];
+
+    for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(),
+         E = Worklist.end(); I != E; ++I) {
+      Formula F = Base;
+      F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I;
+      if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I,
+                     LU.Kind, LU.AccessTy, TLI)) {
+        F.BaseRegs[i] = SE.getAddExpr(G, SE.getIntegerSCEV(*I, G->getType()));
+
+        (void)InsertFormula(LU, LUIdx, F);
+      }
+    }
 
-  for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
-    BasicBlock *ExitingBlock = ExitingBlocks[i];
+    int64_t Imm = ExtractImmediate(G, SE);
+    if (G->isZero() || Imm == 0)
+      continue;
+    Formula F = Base;
+    F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Imm;
+    if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
+                    LU.Kind, LU.AccessTy, TLI))
+      continue;
+    F.BaseRegs[i] = G;
+    (void)InsertFormula(LU, LUIdx, F);
+  }
+}
 
-    // Finally, get the terminating condition for the loop if possible.  If we
-    // can, we want to change it to use a post-incremented version of its
-    // induction variable, to allow coalescing the live ranges for the IV into
-    // one register value.
+/// GenerateICmpZeroScales - For ICmpZero, check to see if we can scale up
+/// the comparison. For example, x == y -> x*c == y*c.
+void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
+                                         Formula Base) {
+  if (LU.Kind != LSRUse::ICmpZero) return;
 
-    BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
-    if (!TermBr)
+  // Determine the integer type for the base formula.
+  const Type *IntTy = Base.getType();
+  if (!IntTy) return;
+  if (SE.getTypeSizeInBits(IntTy) > 64) return;
+
+  // Don't do this if there is more than one offset.
+  if (LU.MinOffset != LU.MaxOffset) return;
+
+  assert(!Base.AM.BaseGV && "ICmpZero use is not legal!");
+
+  // Check each interesting stride.
+  for (SmallSetVector<int64_t, 8>::const_iterator
+       I = Factors.begin(), E = Factors.end(); I != E; ++I) {
+    int64_t Factor = *I;
+    Formula F = Base;
+
+    // Check that the multiplication doesn't overflow.
+    if (F.AM.BaseOffs == INT64_MIN && Factor == -1)
       continue;
-    // FIXME: Overly conservative, termination condition could be an 'or' etc..
-    if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
+    F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs * Factor;
+    if (F.AM.BaseOffs / Factor != Base.AM.BaseOffs)
       continue;
 
-    // Search IVUsesByStride to find Cond's IVUse if there is one.
-    IVStrideUse *CondUse = 0;
-    const SCEV *CondStride = 0;
-    ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
-    if (!FindIVUserForCond(Cond, CondUse, CondStride))
+    // Check that multiplying with the use offset doesn't overflow.
+    int64_t Offset = LU.MinOffset;
+    if (Offset == INT64_MIN && Factor == -1)
+      continue;
+    Offset = (uint64_t)Offset * Factor;
+    if (Offset / Factor != LU.MinOffset)
       continue;
 
-    // If the latch block is exiting and it's not a single block loop, it's
-    // not safe to use postinc iv in other exiting blocks. FIXME: overly
-    // conservative? How about icmp stride optimization?
-    bool UsePostInc =  !(e > 1 && LatchExit && ExitingBlock != LatchBlock);
-    if (UsePostInc && ExitingBlock != LatchBlock) {
-      if (!Cond->hasOneUse())
-        // See below, we don't want the condition to be cloned.
-        UsePostInc = false;
-      else {
-        // If exiting block is the latch block, we know it's safe and profitable
-        // to transform the icmp to use post-inc iv. Otherwise do so only if it
-        // would not reuse another iv and its iv would be reused by other uses.
-        // We are optimizing for the case where the icmp is the only use of the
-        // iv.
-        IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[CondStride];
-        for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
-               E = StrideUses.Users.end(); I != E; ++I) {
-          if (I->getUser() == Cond)
-            continue;
-          if (!I->isUseOfPostIncrementedValue()) {
-            UsePostInc = false;
-            break;
-          }
+    // Check that this scale is legal.
+    if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI))
+      continue;
+
+    // Compensate for the use having MinOffset built into it.
+    F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Offset - LU.MinOffset;
+
+    const SCEV *FactorS = SE.getIntegerSCEV(Factor, IntTy);
+
+    // Check that multiplying with each base register doesn't overflow.
+    for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
+      F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);
+      if (getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
+        goto next;
+    }
+
+    // Check that multiplying with the scaled register doesn't overflow.
+    if (F.ScaledReg) {
+      F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);
+      if (getExactSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
+        continue;
+    }
+
+    // If we make it here and it's legal, add it.
+    (void)InsertFormula(LU, LUIdx, F);
+  next:;
+  }
+}
+
+/// GenerateScales - Generate stride factor reuse formulae by making use of
+/// scaled-offset address modes, for example.
+void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx,
+                                 Formula Base) {
+  // Determine the integer type for the base formula.
+  const Type *IntTy = Base.getType();
+  if (!IntTy) return;
+
+  // If this Formula already has a scaled register, we can't add another one.
+  if (Base.AM.Scale != 0) return;
+
+  // Check each interesting stride.
+  for (SmallSetVector<int64_t, 8>::const_iterator
+       I = Factors.begin(), E = Factors.end(); I != E; ++I) {
+    int64_t Factor = *I;
+
+    Base.AM.Scale = Factor;
+    Base.AM.HasBaseReg = Base.BaseRegs.size() > 1;
+    // Check whether this scale is going to be legal.
+    if (!isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
+                    LU.Kind, LU.AccessTy, TLI)) {
+      // As a special-case, handle special out-of-loop Basic users specially.
+      // TODO: Reconsider this special case.
+      if (LU.Kind == LSRUse::Basic &&
+          isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
+                     LSRUse::Special, LU.AccessTy, TLI) &&
+          LU.AllFixupsOutsideLoop)
+        LU.Kind = LSRUse::Special;
+      else
+        continue;
+    }
+    // For an ICmpZero, negating a solitary base register won't lead to
+    // new solutions.
+    if (LU.Kind == LSRUse::ICmpZero &&
+        !Base.AM.HasBaseReg && Base.AM.BaseOffs == 0 && !Base.AM.BaseGV)
+      continue;
+    // For each addrec base reg, apply the scale, if possible.
+    for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
+      if (const SCEVAddRecExpr *AR =
+            dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i])) {
+        const SCEV *FactorS = SE.getIntegerSCEV(Factor, IntTy);
+        if (FactorS->isZero())
+          continue;
+        // Divide out the factor, ignoring high bits, since we'll be
+        // scaling the value back up in the end.
+        if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true)) {
+          // TODO: This could be optimized to avoid all the copying.
+          Formula F = Base;
+          F.ScaledReg = Quotient;
+          std::swap(F.BaseRegs[i], F.BaseRegs.back());
+          F.BaseRegs.pop_back();
+          (void)InsertFormula(LU, LUIdx, F);
         }
       }
+  }
+}
 
-      // If iv for the stride might be shared and any of the users use pre-inc
-      // iv might be used, then it's not safe to use post-inc iv.
-      if (UsePostInc &&
-          isa<SCEVConstant>(CondStride) &&
-          StrideMightBeShared(CondStride, L, true))
-        UsePostInc = false;
-    }
+/// GenerateTruncates - Generate reuse formulae from different IV types.
+void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx,
+                                    Formula Base) {
+  // This requires TargetLowering to tell us which truncates are free.
+  if (!TLI) return;
+
+  // Don't bother truncating symbolic values.
+  if (Base.AM.BaseGV) return;
+
+  // Determine the integer type for the base formula.
+  const Type *DstTy = Base.getType();
+  if (!DstTy) return;
+  DstTy = SE.getEffectiveSCEVType(DstTy);
+
+  for (SmallSetVector<const Type *, 4>::const_iterator
+       I = Types.begin(), E = Types.end(); I != E; ++I) {
+    const Type *SrcTy = *I;
+    if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) {
+      Formula F = Base;
+
+      if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I);
+      for (SmallVectorImpl<const SCEV *>::iterator J = F.BaseRegs.begin(),
+           JE = F.BaseRegs.end(); J != JE; ++J)
+        *J = SE.getAnyExtendExpr(*J, SrcTy);
+
+      // TODO: This assumes we've done basic processing on all uses and
+      // have an idea what the register usage is.
+      if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
+        continue;
 
-    // If the trip count is computed in terms of a max (due to ScalarEvolution
-    // being unable to find a sufficient guard, for example), change the loop
-    // comparison to use SLT or ULT instead of NE.
-    Cond = OptimizeMax(L, Cond, CondUse);
-
-    // If possible, change stride and operands of the compare instruction to
-    // eliminate one stride. However, avoid rewriting the compare instruction
-    // with an iv of new stride if it's likely the new stride uses will be
-    // rewritten using the stride of the compare instruction.
-    if (ExitingBlock == LatchBlock && isa<SCEVConstant>(CondStride)) {
-      // If the condition stride is a constant and it's the only use, we might
-      // want to optimize it first by turning it to count toward zero.
-      if (!StrideMightBeShared(CondStride, L, false) &&
-          !ShouldCountToZero(Cond, CondUse, SE, L, TLI))
-        Cond = ChangeCompareStride(L, Cond, CondUse, CondStride);
+      (void)InsertFormula(LU, LUIdx, F);
     }
+  }
+}
+
+namespace {
+
+/// WorkItem - Helper class for GenerateCrossUseConstantOffsets. It's used to
+/// defer modifications so that the search phase doesn't have to worry about
+/// the data structures moving underneath it.
+struct WorkItem {
+  size_t LUIdx;
+  int64_t Imm;
+  const SCEV *OrigReg;
+
+  WorkItem(size_t LI, int64_t I, const SCEV *R)
+    : LUIdx(LI), Imm(I), OrigReg(R) {}
+
+  void print(raw_ostream &OS) const;
+  void dump() const;
+};
+
+}
+
+void WorkItem::print(raw_ostream &OS) const {
+  OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx
+     << " , add offset " << Imm;
+}
+
+void WorkItem::dump() const {
+  print(errs()); errs() << '\n';
+}
 
-    if (!UsePostInc)
+/// GenerateCrossUseConstantOffsets - Look for registers which are a constant
+/// distance apart and try to form reuse opportunities between them.
+void LSRInstance::GenerateCrossUseConstantOffsets() {
+  // Group the registers by their value without any added constant offset.
+  typedef std::map<int64_t, const SCEV *> ImmMapTy;
+  typedef DenseMap<const SCEV *, ImmMapTy> RegMapTy;
+  RegMapTy Map;
+  DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
+  SmallVector<const SCEV *, 8> Sequence;
+  for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
+       I != E; ++I) {
+    const SCEV *Reg = *I;
+    int64_t Imm = ExtractImmediate(Reg, SE);
+    std::pair<RegMapTy::iterator, bool> Pair =
+      Map.insert(std::make_pair(Reg, ImmMapTy()));
+    if (Pair.second)
+      Sequence.push_back(Reg);
+    Pair.first->second.insert(std::make_pair(Imm, *I));
+    UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(*I);
+  }
+
+  // Now examine each set of registers with the same base value. Build up
+  // a list of work to do and do the work in a separate step so that we're
+  // not adding formulae and register counts while we're searching.
+  SmallVector<WorkItem, 32> WorkItems;
+  SmallSet<std::pair<size_t, int64_t>, 32> UniqueItems;
+  for (SmallVectorImpl<const SCEV *>::const_iterator I = Sequence.begin(),
+       E = Sequence.end(); I != E; ++I) {
+    const SCEV *Reg = *I;
+    const ImmMapTy &Imms = Map.find(Reg)->second;
+
+    // It's not worthwhile looking for reuse if there's only one offset.
+    if (Imms.size() == 1)
       continue;
 
-    DEBUG(dbgs() << "  Change loop exiting icmp to use postinc iv: "
-          << *Cond << '\n');
+    DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':';
+          for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
+               J != JE; ++J)
+            dbgs() << ' ' << J->first;
+          dbgs() << '\n');
 
-    // It's possible for the setcc instruction to be anywhere in the loop, and
-    // possible for it to have multiple users.  If it is not immediately before
-    // the exiting block branch, move it.
-    if (&*++BasicBlock::iterator(Cond) != (Instruction*)TermBr) {
-      if (Cond->hasOneUse()) {   // Condition has a single use, just move it.
-        Cond->moveBefore(TermBr);
+    // Examine each offset.
+    for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
+         J != JE; ++J) {
+      const SCEV *OrigReg = J->second;
+
+      int64_t JImm = J->first;
+      const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
+
+      if (!isa<SCEVConstant>(OrigReg) &&
+          UsedByIndicesMap[Reg].count() == 1) {
+        DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg << '\n');
+        continue;
+      }
+
+      // Conservatively examine offsets between this orig reg a few selected
+      // other orig regs.
+      ImmMapTy::const_iterator OtherImms[] = {
+        Imms.begin(), prior(Imms.end()),
+        Imms.upper_bound((Imms.begin()->first + prior(Imms.end())->first) / 2)
+      };
+      for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) {
+        ImmMapTy::const_iterator M = OtherImms[i];
+        if (M == J || M == JE) continue;
+
+        // Compute the difference between the two.
+        int64_t Imm = (uint64_t)JImm - M->first;
+        for (int LUIdx = UsedByIndices.find_first(); LUIdx != -1;
+             LUIdx = UsedByIndices.find_next(LUIdx))
+          // Make a memo of this use, offset, and register tuple.
+          if (UniqueItems.insert(std::make_pair(LUIdx, Imm)))
+            WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg));
+      }
+    }
+  }
+
+  Map.clear();
+  Sequence.clear();
+  UsedByIndicesMap.clear();
+  UniqueItems.clear();
+
+  // Now iterate through the worklist and add new formulae.
+  for (SmallVectorImpl<WorkItem>::const_iterator I = WorkItems.begin(),
+       E = WorkItems.end(); I != E; ++I) {
+    const WorkItem &WI = *I;
+    size_t LUIdx = WI.LUIdx;
+    LSRUse &LU = Uses[LUIdx];
+    int64_t Imm = WI.Imm;
+    const SCEV *OrigReg = WI.OrigReg;
+
+    const Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
+    const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm));
+    unsigned BitWidth = SE.getTypeSizeInBits(IntTy);
+
+    // TODO: Use a more targeted data structure.
+    for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
+      Formula F = LU.Formulae[L];
+      // Use the immediate in the scaled register.
+      if (F.ScaledReg == OrigReg) {
+        int64_t Offs = (uint64_t)F.AM.BaseOffs +
+                       Imm * (uint64_t)F.AM.Scale;
+        // Don't create 50 + reg(-50).
+        if (F.referencesReg(SE.getSCEV(
+                   ConstantInt::get(IntTy, -(uint64_t)Offs))))
+          continue;
+        Formula NewF = F;
+        NewF.AM.BaseOffs = Offs;
+        if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
+                        LU.Kind, LU.AccessTy, TLI))
+          continue;
+        NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
+
+        // If the new scale is a constant in a register, and adding the constant
+        // value to the immediate would produce a value closer to zero than the
+        // immediate itself, then the formula isn't worthwhile.
+        if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
+          if (C->getValue()->getValue().isNegative() !=
+                (NewF.AM.BaseOffs < 0) &&
+              (C->getValue()->getValue().abs() * APInt(BitWidth, F.AM.Scale))
+                .ule(APInt(BitWidth, NewF.AM.BaseOffs).abs()))
+            continue;
+
+        // OK, looks good.
+        (void)InsertFormula(LU, LUIdx, NewF);
       } else {
-        // Otherwise, clone the terminating condition and insert into the
-        // loopend.
-        Cond = cast<ICmpInst>(Cond->clone());
-        Cond->setName(L->getHeader()->getName() + ".termcond");
-        ExitingBlock->getInstList().insert(TermBr, Cond);
+        // Use the immediate in a base register.
+        for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) {
+          const SCEV *BaseReg = F.BaseRegs[N];
+          if (BaseReg != OrigReg)
+            continue;
+          Formula NewF = F;
+          NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm;
+          if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
+                          LU.Kind, LU.AccessTy, TLI))
+            continue;
+          NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg);
+
+          // If the new formula has a constant in a register, and adding the
+          // constant value to the immediate would produce a value closer to
+          // zero than the immediate itself, then the formula isn't worthwhile.
+          for (SmallVectorImpl<const SCEV *>::const_iterator
+               J = NewF.BaseRegs.begin(), JE = NewF.BaseRegs.end();
+               J != JE; ++J)
+            if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*J))
+              if (C->getValue()->getValue().isNegative() !=
+                    (NewF.AM.BaseOffs < 0) &&
+                  C->getValue()->getValue().abs()
+                    .ule(APInt(BitWidth, NewF.AM.BaseOffs).abs()))
+                goto skip_formula;
+
+          // Ok, looks good.
+          (void)InsertFormula(LU, LUIdx, NewF);
+          break;
+        skip_formula:;
+        }
+      }
+    }
+  }
+}
 
-        // Clone the IVUse, as the old use still exists!
-        IU->IVUsesByStride[CondStride]->addUser(CondUse->getOffset(), Cond,
-                                             CondUse->getOperandValToReplace());
-        CondUse = &IU->IVUsesByStride[CondStride]->Users.back();
+/// GenerateAllReuseFormulae - Generate formulae for each use.
+void
+LSRInstance::GenerateAllReuseFormulae() {
+  // This is split into multiple loops so that hasRegsUsedByUsesOtherThan
+  // queries are more precise.
+  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+    LSRUse &LU = Uses[LUIdx];
+    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+      GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
+    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+      GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
+  }
+  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+    LSRUse &LU = Uses[LUIdx];
+    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+      GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
+    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+      GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
+    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+      GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
+    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+      GenerateScales(LU, LUIdx, LU.Formulae[i]);
+  }
+  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+    LSRUse &LU = Uses[LUIdx];
+    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+      GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
+  }
+
+  GenerateCrossUseConstantOffsets();
+}
+
+/// If their are multiple formulae with the same set of registers used
+/// by other uses, pick the best one and delete the others.
+void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
+#ifndef NDEBUG
+  bool Changed = false;
+#endif
+
+  // Collect the best formula for each unique set of shared registers. This
+  // is reset for each use.
+  typedef DenseMap<SmallVector<const SCEV *, 2>, size_t, UniquifierDenseMapInfo>
+    BestFormulaeTy;
+  BestFormulaeTy BestFormulae;
+
+  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+    LSRUse &LU = Uses[LUIdx];
+    FormulaSorter Sorter(L, LU, SE, DT);
+
+    // Clear out the set of used regs; it will be recomputed.
+    LU.Regs.clear();
+
+    for (size_t FIdx = 0, NumForms = LU.Formulae.size();
+         FIdx != NumForms; ++FIdx) {
+      Formula &F = LU.Formulae[FIdx];
+
+      SmallVector<const SCEV *, 2> Key;
+      for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
+           JE = F.BaseRegs.end(); J != JE; ++J) {
+        const SCEV *Reg = *J;
+        if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
+          Key.push_back(Reg);
       }
+      if (F.ScaledReg &&
+          RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
+        Key.push_back(F.ScaledReg);
+      // Unstable sort by host order ok, because this is only used for
+      // uniquifying.
+      std::sort(Key.begin(), Key.end());
+
+      std::pair<BestFormulaeTy::const_iterator, bool> P =
+        BestFormulae.insert(std::make_pair(Key, FIdx));
+      if (!P.second) {
+        Formula &Best = LU.Formulae[P.first->second];
+        if (Sorter.operator()(F, Best))
+          std::swap(F, Best);
+        DEBUG(dbgs() << "Filtering out "; F.print(dbgs());
+              dbgs() << "\n"
+                        "  in favor of "; Best.print(dbgs());
+              dbgs() << '\n');
+#ifndef NDEBUG
+        Changed = true;
+#endif
+        std::swap(F, LU.Formulae.back());
+        LU.Formulae.pop_back();
+        --FIdx;
+        --NumForms;
+        continue;
+      }
+      if (F.ScaledReg) LU.Regs.insert(F.ScaledReg);
+      LU.Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
     }
+    BestFormulae.clear();
+  }
 
-    // If we get to here, we know that we can transform the setcc instruction to
-    // use the post-incremented version of the IV, allowing us to coalesce the
-    // live ranges for the IV correctly.
-    CondUse->setOffset(SE->getMinusSCEV(CondUse->getOffset(), CondStride));
-    CondUse->setIsUseOfPostIncrementedValue(true);
-    Changed = true;
+  DEBUG(if (Changed) {
+          dbgs() << "\n"
+                    "After filtering out undesirable candidates:\n";
+          print_uses(dbgs());
+        });
+}
 
-    ++NumLoopCond;
+/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of
+/// formulae to choose from, use some rough heuristics to prune down the number
+/// of formulae. This keeps the main solver from taking an extraordinary amount
+/// of time in some worst-case scenarios.
+void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
+  // This is a rough guess that seems to work fairly well.
+  const size_t Limit = UINT16_MAX;
+
+  SmallPtrSet<const SCEV *, 4> Taken;
+  for (;;) {
+    // Estimate the worst-case number of solutions we might consider. We almost
+    // never consider this many solutions because we prune the search space,
+    // but the pruning isn't always sufficient.
+    uint32_t Power = 1;
+    for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
+         E = Uses.end(); I != E; ++I) {
+      size_t FSize = I->Formulae.size();
+      if (FSize >= Limit) {
+        Power = Limit;
+        break;
+      }
+      Power *= FSize;
+      if (Power >= Limit)
+        break;
+    }
+    if (Power < Limit)
+      break;
+
+    // Ok, we have too many of formulae on our hands to conveniently handle.
+    // Use a rough heuristic to thin out the list.
+
+    // Pick the register which is used by the most LSRUses, which is likely
+    // to be a good reuse register candidate.
+    const SCEV *Best = 0;
+    unsigned BestNum = 0;
+    for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
+         I != E; ++I) {
+      const SCEV *Reg = *I;
+      if (Taken.count(Reg))
+        continue;
+      if (!Best)
+        Best = Reg;
+      else {
+        unsigned Count = RegUses.getUsedByIndices(Reg).count();
+        if (Count > BestNum) {
+          Best = Reg;
+          BestNum = Count;
+        }
+      }
+    }
+
+    DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best
+                 << " will yield profitable reuse.\n");
+    Taken.insert(Best);
+
+    // In any use with formulae which references this register, delete formulae
+    // which don't reference it.
+    for (SmallVectorImpl<LSRUse>::iterator I = Uses.begin(),
+         E = Uses.end(); I != E; ++I) {
+      LSRUse &LU = *I;
+      if (!LU.Regs.count(Best)) continue;
+
+      // Clear out the set of used regs; it will be recomputed.
+      LU.Regs.clear();
+
+      for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
+        Formula &F = LU.Formulae[i];
+        if (!F.referencesReg(Best)) {
+          DEBUG(dbgs() << "  Deleting "; F.print(dbgs()); dbgs() << '\n');
+          std::swap(LU.Formulae.back(), F);
+          LU.Formulae.pop_back();
+          --e;
+          --i;
+          continue;
+        }
+
+        if (F.ScaledReg) LU.Regs.insert(F.ScaledReg);
+        LU.Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
+      }
+    }
+
+    DEBUG(dbgs() << "After pre-selection:\n";
+          print_uses(dbgs()));
   }
 }
 
-bool LoopStrengthReduce::OptimizeLoopCountIVOfStride(const SCEV* &Stride,
-                                                     IVStrideUse* &CondUse,
-                                                     Loop *L) {
-  // If the only use is an icmp of a loop exiting conditional branch, then
-  // attempt the optimization.
-  BasedUser User = BasedUser(*CondUse, SE);
-  assert(isa<ICmpInst>(User.Inst) && "Expecting an ICMPInst!");
-  ICmpInst *Cond = cast<ICmpInst>(User.Inst);
+/// SolveRecurse - This is the recursive solver.
+void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
+                               Cost &SolutionCost,
+                               SmallVectorImpl<const Formula *> &Workspace,
+                               const Cost &CurCost,
+                               const SmallPtrSet<const SCEV *, 16> &CurRegs,
+                               DenseSet<const SCEV *> &VisitedRegs) const {
+  // Some ideas:
+  //  - prune more:
+  //    - use more aggressive filtering
+  //    - sort the formula so that the most profitable solutions are found first
+  //    - sort the uses too
+  //  - search faster:
+  //    - don't compute a cost, and then compare. compare while computing a cost
+  //      and bail early.
+  //    - track register sets with SmallBitVector
+
+  const LSRUse &LU = Uses[Workspace.size()];
+
+  // If this use references any register that's already a part of the
+  // in-progress solution, consider it a requirement that a formula must
+  // reference that register in order to be considered. This prunes out
+  // unprofitable searching.
+  SmallSetVector<const SCEV *, 4> ReqRegs;
+  for (SmallPtrSet<const SCEV *, 16>::const_iterator I = CurRegs.begin(),
+       E = CurRegs.end(); I != E; ++I)
+    if (LU.Regs.count(*I))
+      ReqRegs.insert(*I);
+
+  bool AnySatisfiedReqRegs = false;
+  SmallPtrSet<const SCEV *, 16> NewRegs;
+  Cost NewCost;
+retry:
+  for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
+       E = LU.Formulae.end(); I != E; ++I) {
+    const Formula &F = *I;
+
+    // Ignore formulae which do not use any of the required registers.
+    for (SmallSetVector<const SCEV *, 4>::const_iterator J = ReqRegs.begin(),
+         JE = ReqRegs.end(); J != JE; ++J) {
+      const SCEV *Reg = *J;
+      if ((!F.ScaledReg || F.ScaledReg != Reg) &&
+          std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) ==
+          F.BaseRegs.end())
+        goto skip;
+    }
+    AnySatisfiedReqRegs = true;
+
+    // Evaluate the cost of the current formula. If it's already worse than
+    // the current best, prune the search at that point.
+    NewCost = CurCost;
+    NewRegs = CurRegs;
+    NewCost.RateFormula(F, NewRegs, VisitedRegs, L, LU.Offsets, SE, DT);
+    if (NewCost < SolutionCost) {
+      Workspace.push_back(&F);
+      if (Workspace.size() != Uses.size()) {
+        SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
+                     NewRegs, VisitedRegs);
+        if (F.getNumRegs() == 1 && Workspace.size() == 1)
+          VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);
+      } else {
+        DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());
+              dbgs() << ". Regs:";
+              for (SmallPtrSet<const SCEV *, 16>::const_iterator
+                   I = NewRegs.begin(), E = NewRegs.end(); I != E; ++I)
+                dbgs() << ' ' << **I;
+              dbgs() << '\n');
+
+        SolutionCost = NewCost;
+        Solution = Workspace;
+      }
+      Workspace.pop_back();
+    }
+  skip:;
+  }
 
-  // Less strict check now that compare stride optimization is done.
-  if (!ShouldCountToZero(Cond, CondUse, SE, L))
-    return false;
+  // If none of the formulae had all of the required registers, relax the
+  // constraint so that we don't exclude all formulae.
+  if (!AnySatisfiedReqRegs) {
+    ReqRegs.clear();
+    goto retry;
+  }
+}
 
-  Value *CondOp0 = Cond->getOperand(0);
-  PHINode *PHIExpr = dyn_cast<PHINode>(CondOp0);
-  Instruction *Incr;
-  if (!PHIExpr) {
-    // Value tested is postinc. Find the phi node.
-    Incr = dyn_cast<BinaryOperator>(CondOp0);
-    // FIXME: Just use User.OperandValToReplace here?
-    if (!Incr || Incr->getOpcode() != Instruction::Add)
-      return false;
+void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
+  SmallVector<const Formula *, 8> Workspace;
+  Cost SolutionCost;
+  SolutionCost.Loose();
+  Cost CurCost;
+  SmallPtrSet<const SCEV *, 16> CurRegs;
+  DenseSet<const SCEV *> VisitedRegs;
+  Workspace.reserve(Uses.size());
+
+  SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
+               CurRegs, VisitedRegs);
+
+  // Ok, we've now made all our decisions.
+  DEBUG(dbgs() << "\n"
+                  "The chosen solution requires "; SolutionCost.print(dbgs());
+        dbgs() << ":\n";
+        for (size_t i = 0, e = Uses.size(); i != e; ++i) {
+          dbgs() << "  ";
+          Uses[i].print(dbgs());
+          dbgs() << "\n"
+                    "    ";
+          Solution[i]->print(dbgs());
+          dbgs() << '\n';
+        });
+}
 
-    PHIExpr = dyn_cast<PHINode>(Incr->getOperand(0));
-    if (!PHIExpr)
-      return false;
-    // 1 use for preinc value, the increment.
-    if (!PHIExpr->hasOneUse())
-      return false;
-  } else {
-    assert(isa<PHINode>(CondOp0) &&
-           "Unexpected loop exiting counting instruction sequence!");
-    PHIExpr = cast<PHINode>(CondOp0);
-    // Value tested is preinc.  Find the increment.
-    // A CmpInst is not a BinaryOperator; we depend on this.
-    Instruction::use_iterator UI = PHIExpr->use_begin();
-    Incr = dyn_cast<BinaryOperator>(UI);
-    if (!Incr)
-      Incr = dyn_cast<BinaryOperator>(++UI);
-    // One use for postinc value, the phi.  Unnecessarily conservative?
-    if (!Incr || !Incr->hasOneUse() || Incr->getOpcode() != Instruction::Add)
-      return false;
+/// getImmediateDominator - A handy utility for the specific DominatorTree
+/// query that we need here.
+///
+static BasicBlock *getImmediateDominator(BasicBlock *BB, DominatorTree &DT) {
+  DomTreeNode *Node = DT.getNode(BB);
+  if (!Node) return 0;
+  Node = Node->getIDom();
+  if (!Node) return 0;
+  return Node->getBlock();
+}
+
+Value *LSRInstance::Expand(const LSRFixup &LF,
+                           const Formula &F,
+                           BasicBlock::iterator IP,
+                           SCEVExpander &Rewriter,
+                           SmallVectorImpl<WeakVH> &DeadInsts) const {
+  const LSRUse &LU = Uses[LF.LUIdx];
+
+  // Then, collect some instructions which we will remain dominated by when
+  // expanding the replacement. These must be dominated by any operands that
+  // will be required in the expansion.
+  SmallVector<Instruction *, 4> Inputs;
+  if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
+    Inputs.push_back(I);
+  if (LU.Kind == LSRUse::ICmpZero)
+    if (Instruction *I =
+          dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
+      Inputs.push_back(I);
+  if (LF.PostIncLoop) {
+    if (!L->contains(LF.UserInst))
+      Inputs.push_back(L->getLoopLatch()->getTerminator());
+    else
+      Inputs.push_back(IVIncInsertPos);
   }
 
-  // Replace the increment with a decrement.
-  DEBUG(dbgs() << "LSR: Examining use ");
-  DEBUG(WriteAsOperand(dbgs(), CondOp0, /*PrintType=*/false));
-  DEBUG(dbgs() << " in Inst: " << *Cond << '\n');
-  BinaryOperator *Decr =  BinaryOperator::Create(Instruction::Sub,
-                         Incr->getOperand(0), Incr->getOperand(1), "tmp", Incr);
-  Incr->replaceAllUsesWith(Decr);
-  Incr->eraseFromParent();
-
-  // Substitute endval-startval for the original startval, and 0 for the
-  // original endval.  Since we're only testing for equality this is OK even
-  // if the computation wraps around.
-  BasicBlock  *Preheader = L->getLoopPreheader();
-  Instruction *PreInsertPt = Preheader->getTerminator();
-  unsigned InBlock = L->contains(PHIExpr->getIncomingBlock(0)) ? 1 : 0;
-  Value *StartVal = PHIExpr->getIncomingValue(InBlock);
-  Value *EndVal = Cond->getOperand(1);
-  DEBUG(dbgs() << "    Optimize loop counting iv to count down ["
-        << *EndVal << " .. " << *StartVal << "]\n");
-
-  // FIXME: check for case where both are constant.
-  Constant* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0);
-  BinaryOperator *NewStartVal = BinaryOperator::Create(Instruction::Sub,
-                                          EndVal, StartVal, "tmp", PreInsertPt);
-  PHIExpr->setIncomingValue(InBlock, NewStartVal);
-  Cond->setOperand(1, Zero);
-  DEBUG(dbgs() << "    New icmp: " << *Cond << "\n");
-
-  int64_t SInt = cast<SCEVConstant>(Stride)->getValue()->getSExtValue();
-  const SCEV *NewStride = 0;
-  bool Found = false;
-  for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
-    const SCEV *OldStride = IU->StrideOrder[i];
-    if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OldStride))
-      if (SC->getValue()->getSExtValue() == -SInt) {
-        Found = true;
-        NewStride = OldStride;
+  // Then, climb up the immediate dominator tree as far as we can go while
+  // still being dominated by the input positions.
+  for (;;) {
+    bool AllDominate = true;
+    Instruction *BetterPos = 0;
+    BasicBlock *IDom = getImmediateDominator(IP->getParent(), DT);
+    if (!IDom) break;
+    Instruction *Tentative = IDom->getTerminator();
+    for (SmallVectorImpl<Instruction *>::const_iterator I = Inputs.begin(),
+         E = Inputs.end(); I != E; ++I) {
+      Instruction *Inst = *I;
+      if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
+        AllDominate = false;
         break;
       }
+      if (IDom == Inst->getParent() &&
+          (!BetterPos || DT.dominates(BetterPos, Inst)))
+        BetterPos = next(BasicBlock::iterator(Inst));
+    }
+    if (!AllDominate)
+      break;
+    if (BetterPos)
+      IP = BetterPos;
+    else
+      IP = Tentative;
   }
+  while (isa<PHINode>(IP)) ++IP;
+
+  // Inform the Rewriter if we have a post-increment use, so that it can
+  // perform an advantageous expansion.
+  Rewriter.setPostInc(LF.PostIncLoop);
+
+  // This is the type that the user actually needs.
+  const Type *OpTy = LF.OperandValToReplace->getType();
+  // This will be the type that we'll initially expand to.
+  const Type *Ty = F.getType();
+  if (!Ty)
+    // No type known; just expand directly to the ultimate type.
+    Ty = OpTy;
+  else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy))
+    // Expand directly to the ultimate type if it's the right size.
+    Ty = OpTy;
+  // This is the type to do integer arithmetic in.
+  const Type *IntTy = SE.getEffectiveSCEVType(Ty);
+
+  // Build up a list of operands to add together to form the full base.
+  SmallVector<const SCEV *, 8> Ops;
+
+  // Expand the BaseRegs portion.
+  for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
+       E = F.BaseRegs.end(); I != E; ++I) {
+    const SCEV *Reg = *I;
+    assert(!Reg->isZero() && "Zero allocated in a base register!");
+
+    // If we're expanding for a post-inc user for the add-rec's loop, make the
+    // post-inc adjustment.
+    const SCEV *Start = Reg;
+    while (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Start)) {
+      if (AR->getLoop() == LF.PostIncLoop) {
+        Reg = SE.getAddExpr(Reg, AR->getStepRecurrence(SE));
+        // If the user is inside the loop, insert the code after the increment
+        // so that it is dominated by its operand. If the original insert point
+        // was already dominated by the increment, keep it, because there may
+        // be loop-variant operands that need to be respected also.
+        if (L->contains(LF.UserInst) && !DT.dominates(IVIncInsertPos, IP))
+          IP = IVIncInsertPos;
+        break;
+      }
+      Start = AR->getStart();
+    }
 
-  if (!Found)
-    NewStride = SE->getIntegerSCEV(-SInt, Stride->getType());
-  IU->AddUser(NewStride, CondUse->getOffset(), Cond, Cond->getOperand(0));
-  IU->IVUsesByStride[Stride]->removeUser(CondUse);
+    Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
+  }
 
-  CondUse = &IU->IVUsesByStride[NewStride]->Users.back();
-  Stride = NewStride;
+  // Flush the operand list to suppress SCEVExpander hoisting.
+  if (!Ops.empty()) {
+    Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+    Ops.clear();
+    Ops.push_back(SE.getUnknown(FullV));
+  }
 
-  ++NumCountZero;
+  // Expand the ScaledReg portion.
+  Value *ICmpScaledV = 0;
+  if (F.AM.Scale != 0) {
+    const SCEV *ScaledS = F.ScaledReg;
+
+    // If we're expanding for a post-inc user for the add-rec's loop, make the
+    // post-inc adjustment.
+    if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ScaledS))
+      if (AR->getLoop() == LF.PostIncLoop)
+        ScaledS = SE.getAddExpr(ScaledS, AR->getStepRecurrence(SE));
+
+    if (LU.Kind == LSRUse::ICmpZero) {
+      // An interesting way of "folding" with an icmp is to use a negated
+      // scale, which we'll implement by inserting it into the other operand
+      // of the icmp.
+      assert(F.AM.Scale == -1 &&
+             "The only scale supported by ICmpZero uses is -1!");
+      ICmpScaledV = Rewriter.expandCodeFor(ScaledS, 0, IP);
+    } else {
+      // Otherwise just expand the scaled register and an explicit scale,
+      // which is expected to be matched as part of the address.
+      ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP));
+      ScaledS = SE.getMulExpr(ScaledS,
+                              SE.getIntegerSCEV(F.AM.Scale,
+                                                ScaledS->getType()));
+      Ops.push_back(ScaledS);
+
+      // Flush the operand list to suppress SCEVExpander hoisting.
+      Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+      Ops.clear();
+      Ops.push_back(SE.getUnknown(FullV));
+    }
+  }
 
-  return true;
+  // Expand the GV portion.
+  if (F.AM.BaseGV) {
+    Ops.push_back(SE.getUnknown(F.AM.BaseGV));
+
+    // Flush the operand list to suppress SCEVExpander hoisting.
+    Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+    Ops.clear();
+    Ops.push_back(SE.getUnknown(FullV));
+  }
+
+  // Expand the immediate portion.
+  int64_t Offset = (uint64_t)F.AM.BaseOffs + LF.Offset;
+  if (Offset != 0) {
+    if (LU.Kind == LSRUse::ICmpZero) {
+      // The other interesting way of "folding" with an ICmpZero is to use a
+      // negated immediate.
+      if (!ICmpScaledV)
+        ICmpScaledV = ConstantInt::get(IntTy, -Offset);
+      else {
+        Ops.push_back(SE.getUnknown(ICmpScaledV));
+        ICmpScaledV = ConstantInt::get(IntTy, Offset);
+      }
+    } else {
+      // Just add the immediate values. These again are expected to be matched
+      // as part of the address.
+      Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy, Offset)));
+    }
+  }
+
+  // Emit instructions summing all the operands.
+  const SCEV *FullS = Ops.empty() ?
+                      SE.getIntegerSCEV(0, IntTy) :
+                      SE.getAddExpr(Ops);
+  Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP);
+
+  // We're done expanding now, so reset the rewriter.
+  Rewriter.setPostInc(0);
+
+  // An ICmpZero Formula represents an ICmp which we're handling as a
+  // comparison against zero. Now that we've expanded an expression for that
+  // form, update the ICmp's other operand.
+  if (LU.Kind == LSRUse::ICmpZero) {
+    ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
+    DeadInsts.push_back(CI->getOperand(1));
+    assert(!F.AM.BaseGV && "ICmp does not support folding a global value and "
+                           "a scale at the same time!");
+    if (F.AM.Scale == -1) {
+      if (ICmpScaledV->getType() != OpTy) {
+        Instruction *Cast =
+          CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false,
+                                                   OpTy, false),
+                           ICmpScaledV, OpTy, "tmp", CI);
+        ICmpScaledV = Cast;
+      }
+      CI->setOperand(1, ICmpScaledV);
+    } else {
+      assert(F.AM.Scale == 0 &&
+             "ICmp does not support folding a global value and "
+             "a scale at the same time!");
+      Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
+                                           -(uint64_t)Offset);
+      if (C->getType() != OpTy)
+        C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+                                                          OpTy, false),
+                                  C, OpTy);
+
+      CI->setOperand(1, C);
+    }
+  }
+
+  return FullV;
 }
 
-/// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding
-/// when to exit the loop is used only for that purpose, try to rearrange things
-/// so it counts down to a test against zero.
-bool LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
-  bool ThisChanged = false;
-  for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
-    const SCEV *Stride = IU->StrideOrder[i];
-    std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
-      IU->IVUsesByStride.find(Stride);
-    assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
-    // FIXME: Generalize to non-affine IV's.
-    if (!SI->first->isLoopInvariant(L))
-      continue;
-    // If stride is a constant and it has an icmpinst use, check if we can
-    // optimize the loop to count down.
-    if (isa<SCEVConstant>(Stride) && SI->second->Users.size() == 1) {
-      Instruction *User = SI->second->Users.begin()->getUser();
-      if (!isa<ICmpInst>(User))
-        continue;
-      const SCEV *CondStride = Stride;
-      IVStrideUse *Use = &*SI->second->Users.begin();
-      if (!OptimizeLoopCountIVOfStride(CondStride, Use, L))
-        continue;
-      ThisChanged = true;
+/// RewriteForPHI - Helper for Rewrite. PHI nodes are special because the use
+/// of their operands effectively happens in their predecessor blocks, so the
+/// expression may need to be expanded in multiple places.
+void LSRInstance::RewriteForPHI(PHINode *PN,
+                                const LSRFixup &LF,
+                                const Formula &F,
+                                SCEVExpander &Rewriter,
+                                SmallVectorImpl<WeakVH> &DeadInsts,
+                                Pass *P) const {
+  DenseMap<BasicBlock *, Value *> Inserted;
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+    if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
+      BasicBlock *BB = PN->getIncomingBlock(i);
+
+      // If this is a critical edge, split the edge so that we do not insert
+      // the code on all predecessor/successor paths.  We do this unless this
+      // is the canonical backedge for this loop, which complicates post-inc
+      // users.
+      if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
+          !isa<IndirectBrInst>(BB->getTerminator()) &&
+          (PN->getParent() != L->getHeader() || !L->contains(BB))) {
+        // Split the critical edge.
+        BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P);
+
+        // If PN is outside of the loop and BB is in the loop, we want to
+        // move the block to be immediately before the PHI block, not
+        // immediately after BB.
+        if (L->contains(BB) && !L->contains(PN))
+          NewBB->moveBefore(PN->getParent());
+
+        // Splitting the edge can reduce the number of PHI entries we have.
+        e = PN->getNumIncomingValues();
+        BB = NewBB;
+        i = PN->getBasicBlockIndex(BB);
+      }
 
-      // Now check if it's possible to reuse this iv for other stride uses.
-      for (unsigned j = 0, ee = IU->StrideOrder.size(); j != ee; ++j) {
-        const SCEV *SStride = IU->StrideOrder[j];
-        if (SStride == CondStride)
-          continue;
-        std::map<const SCEV *, IVUsersOfOneStride *>::iterator SII =
-          IU->IVUsesByStride.find(SStride);
-        assert(SII != IU->IVUsesByStride.end() && "Stride doesn't exist!");
-        // FIXME: Generalize to non-affine IV's.
-        if (!SII->first->isLoopInvariant(L))
-          continue;
-        // FIXME: Rewrite other stride using CondStride.
+      std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
+        Inserted.insert(std::make_pair(BB, static_cast<Value *>(0)));
+      if (!Pair.second)
+        PN->setIncomingValue(i, Pair.first->second);
+      else {
+        Value *FullV = Expand(LF, F, BB->getTerminator(), Rewriter, DeadInsts);
+
+        // If this is reuse-by-noop-cast, insert the noop cast.
+        const Type *OpTy = LF.OperandValToReplace->getType();
+        if (FullV->getType() != OpTy)
+          FullV =
+            CastInst::Create(CastInst::getCastOpcode(FullV, false,
+                                                     OpTy, false),
+                             FullV, LF.OperandValToReplace->getType(),
+                             "tmp", BB->getTerminator());
+
+        PN->setIncomingValue(i, FullV);
+        Pair.first->second = FullV;
       }
     }
+}
+
+/// Rewrite - Emit instructions for the leading candidate expression for this
+/// LSRUse (this is called "expanding"), and update the UserInst to reference
+/// the newly expanded value.
+void LSRInstance::Rewrite(const LSRFixup &LF,
+                          const Formula &F,
+                          SCEVExpander &Rewriter,
+                          SmallVectorImpl<WeakVH> &DeadInsts,
+                          Pass *P) const {
+  // First, find an insertion point that dominates UserInst. For PHI nodes,
+  // find the nearest block which dominates all the relevant uses.
+  if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
+    RewriteForPHI(PN, LF, F, Rewriter, DeadInsts, P);
+  } else {
+    Value *FullV = Expand(LF, F, LF.UserInst, Rewriter, DeadInsts);
+
+    // If this is reuse-by-noop-cast, insert the noop cast.
+    const Type *OpTy = LF.OperandValToReplace->getType();
+    if (FullV->getType() != OpTy) {
+      Instruction *Cast =
+        CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),
+                         FullV, OpTy, "tmp", LF.UserInst);
+      FullV = Cast;
+    }
+
+    // Update the user. ICmpZero is handled specially here (for now) because
+    // Expand may have updated one of the operands of the icmp already, and
+    // its new value may happen to be equal to LF.OperandValToReplace, in
+    // which case doing replaceUsesOfWith leads to replacing both operands
+    // with the same value. TODO: Reorganize this.
+    if (Uses[LF.LUIdx].Kind == LSRUse::ICmpZero)
+      LF.UserInst->setOperand(0, FullV);
+    else
+      LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
   }
 
-  Changed |= ThisChanged;
-  return ThisChanged;
+  DeadInsts.push_back(LF.OperandValToReplace);
 }
 
-bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
-  IU = &getAnalysis<IVUsers>();
-  SE = &getAnalysis<ScalarEvolution>();
-  Changed = false;
+void
+LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
+                               Pass *P) {
+  // Keep track of instructions we may have made dead, so that
+  // we can remove them after we are done working.
+  SmallVector<WeakVH, 16> DeadInsts;
+
+  SCEVExpander Rewriter(SE);
+  Rewriter.disableCanonicalMode();
+  Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
 
-  // If LoopSimplify form is not available, stay out of trouble.
-  if (!L->getLoopPreheader() || !L->getLoopLatch())
-    return false;
+  // Expand the new value definitions and update the users.
+  for (size_t i = 0, e = Fixups.size(); i != e; ++i) {
+    size_t LUIdx = Fixups[i].LUIdx;
+
+    Rewrite(Fixups[i], *Solution[LUIdx], Rewriter, DeadInsts, P);
+
+    Changed = true;
+  }
 
-  if (!IU->IVUsesByStride.empty()) {
-    DEBUG(dbgs() << "\nLSR on \"" << L->getHeader()->getParent()->getName()
-          << "\" ";
-          L->print(dbgs()));
+  // Clean up after ourselves. This must be done before deleting any
+  // instructions.
+  Rewriter.clear();
 
-    // Sort the StrideOrder so we process larger strides first.
-    std::stable_sort(IU->StrideOrder.begin(), IU->StrideOrder.end(),
-                     StrideCompare(SE));
+  Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
+}
 
-    // Optimize induction variables.  Some indvar uses can be transformed to use
-    // strides that will be needed for other purposes.  A common example of this
-    // is the exit test for the loop, which can often be rewritten to use the
-    // computation of some other indvar to decide when to terminate the loop.
-    OptimizeIndvars(L);
+LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
+  : IU(P->getAnalysis<IVUsers>()),
+    SE(P->getAnalysis<ScalarEvolution>()),
+    DT(P->getAnalysis<DominatorTree>()),
+    TLI(tli), L(l), Changed(false), IVIncInsertPos(0) {
 
-    // Change loop terminating condition to use the postinc iv when possible
-    // and optimize loop terminating compare. FIXME: Move this after
-    // StrengthReduceIVUsersOfStride?
-    OptimizeLoopTermCond(L);
+  // If LoopSimplify form is not available, stay out of trouble.
+  if (!L->isLoopSimplifyForm()) return;
+
+  // If there's no interesting work to be done, bail early.
+  if (IU.empty()) return;
+
+  DEBUG(dbgs() << "\nLSR on loop ";
+        WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
+        dbgs() << ":\n");
+
+  /// OptimizeShadowIV - If IV is used in a int-to-float cast
+  /// inside the loop then try to eliminate the cast operation.
+  OptimizeShadowIV();
+
+  // Change loop terminating condition to use the postinc iv when possible.
+  Changed |= OptimizeLoopTermCond();
+
+  CollectInterestingTypesAndFactors();
+  CollectFixupsAndInitialFormulae();
+  CollectLoopInvariantFixupsAndFormulae();
+
+  DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n";
+        print_uses(dbgs()));
+
+  // Now use the reuse data to generate a bunch of interesting ways
+  // to formulate the values needed for the uses.
+  GenerateAllReuseFormulae();
+
+  DEBUG(dbgs() << "\n"
+                  "After generating reuse formulae:\n";
+        print_uses(dbgs()));
+
+  FilterOutUndesirableDedicatedRegisters();
+  NarrowSearchSpaceUsingHeuristics();
+
+  SmallVector<const Formula *, 8> Solution;
+  Solve(Solution);
+  assert(Solution.size() == Uses.size() && "Malformed solution!");
+
+  // Release memory that is no longer needed.
+  Factors.clear();
+  Types.clear();
+  RegUses.clear();
+
+#ifndef NDEBUG
+  // Formulae should be legal.
+  for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
+       E = Uses.end(); I != E; ++I) {
+     const LSRUse &LU = *I;
+     for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
+          JE = LU.Formulae.end(); J != JE; ++J)
+        assert(isLegalUse(J->AM, LU.MinOffset, LU.MaxOffset,
+                          LU.Kind, LU.AccessTy, TLI) &&
+               "Illegal formula generated!");
+  };
+#endif
 
-    // FIXME: We can shrink overlarge IV's here.  e.g. if the code has
-    // computation in i64 values and the target doesn't support i64, demote
-    // the computation to 32-bit if safe.
+  // Now that we've decided what we want, make it so.
+  ImplementSolution(Solution, P);
+}
 
-    // FIXME: Attempt to reuse values across multiple IV's.  In particular, we
-    // could have something like "for(i) { foo(i*8); bar(i*16) }", which should
-    // be codegened as "for (j = 0;; j+=8) { foo(j); bar(j+j); }" on X86/PPC.
-    // Need to be careful that IV's are all the same type.  Only works for
-    // intptr_t indvars.
+void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
+  if (Factors.empty() && Types.empty()) return;
 
-    // IVsByStride keeps IVs for one particular loop.
-    assert(IVsByStride.empty() && "Stale entries in IVsByStride?");
+  OS << "LSR has identified the following interesting factors and types: ";
+  bool First = true;
 
-    StrengthReduceIVUsers(L);
+  for (SmallSetVector<int64_t, 8>::const_iterator
+       I = Factors.begin(), E = Factors.end(); I != E; ++I) {
+    if (!First) OS << ", ";
+    First = false;
+    OS << '*' << *I;
+  }
 
-    // After all sharing is done, see if we can adjust the loop to test against
-    // zero instead of counting up to a maximum.  This is usually faster.
-    OptimizeLoopCountIV(L);
+  for (SmallSetVector<const Type *, 4>::const_iterator
+       I = Types.begin(), E = Types.end(); I != E; ++I) {
+    if (!First) OS << ", ";
+    First = false;
+    OS << '(' << **I << ')';
+  }
+  OS << '\n';
+}
 
-    // We're done analyzing this loop; release all the state we built up for it.
-    IVsByStride.clear();
+void LSRInstance::print_fixups(raw_ostream &OS) const {
+  OS << "LSR is examining the following fixup sites:\n";
+  for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
+       E = Fixups.end(); I != E; ++I) {
+    const LSRFixup &LF = *I;
+    dbgs() << "  ";
+    LF.print(OS);
+    OS << '\n';
+  }
+}
 
-    // Clean up after ourselves
-    DeleteTriviallyDeadInstructions();
+void LSRInstance::print_uses(raw_ostream &OS) const {
+  OS << "LSR is examining the following uses:\n";
+  for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
+       E = Uses.end(); I != E; ++I) {
+    const LSRUse &LU = *I;
+    dbgs() << "  ";
+    LU.print(OS);
+    OS << '\n';
+    for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
+         JE = LU.Formulae.end(); J != JE; ++J) {
+      OS << "    ";
+      J->print(OS);
+      OS << '\n';
+    }
   }
+}
+
+void LSRInstance::print(raw_ostream &OS) const {
+  print_factors_and_types(OS);
+  print_fixups(OS);
+  print_uses(OS);
+}
+
+void LSRInstance::dump() const {
+  print(errs()); errs() << '\n';
+}
+
+namespace {
+
+class LoopStrengthReduce : public LoopPass {
+  /// TLI - Keep a pointer of a TargetLowering to consult for determining
+  /// transformation profitability.
+  const TargetLowering *const TLI;
+
+public:
+  static char ID; // Pass ID, replacement for typeid
+  explicit LoopStrengthReduce(const TargetLowering *tli = 0);
+
+private:
+  bool runOnLoop(Loop *L, LPPassManager &LPM);
+  void getAnalysisUsage(AnalysisUsage &AU) const;
+};
+
+}
+
+char LoopStrengthReduce::ID = 0;
+static RegisterPass<LoopStrengthReduce>
+X("loop-reduce", "Loop Strength Reduction");
+
+Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
+  return new LoopStrengthReduce(TLI);
+}
+
+LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli)
+  : LoopPass(&ID), TLI(tli) {}
+
+void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
+  // We split critical edges, so we change the CFG.  However, we do update
+  // many analyses if they are around.
+  AU.addPreservedID(LoopSimplifyID);
+  AU.addPreserved<LoopInfo>();
+  AU.addPreserved("domfrontier");
+
+  AU.addRequiredID(LoopSimplifyID);
+  AU.addRequired<DominatorTree>();
+  AU.addPreserved<DominatorTree>();
+  AU.addRequired<ScalarEvolution>();
+  AU.addPreserved<ScalarEvolution>();
+  AU.addRequired<IVUsers>();
+  AU.addPreserved<IVUsers>();
+}
+
+bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
+  bool Changed = false;
+
+  // Run the main LSR transformation.
+  Changed |= LSRInstance(TLI, L, this).getChanged();
 
   // At this point, it is worth checking to see if any recurrence PHIs are also
   // dead, so that we can remove them as well.
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index e5fba28..071e9b7 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -170,7 +170,7 @@ Pass *llvm::createLoopUnswitchPass(bool Os) {
 /// Otherwise, return null.
 static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
   // We can never unswitch on vector conditions.
-  if (isa<VectorType>(Cond->getType()))
+  if (Cond->getType()->isVectorTy())
     return 0;
 
   // Constants should be folded, not unswitched on!
@@ -871,7 +871,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
   // If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC
   // in the loop with the appropriate one directly.
   if (IsEqual || (isa<ConstantInt>(Val) &&
-      Val->getType()->isInteger(1))) {
+      Val->getType()->isIntegerTy(1))) {
     Value *Replacement;
     if (IsEqual)
       Replacement = Val;
@@ -997,10 +997,10 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
     case Instruction::And:
       if (isa<ConstantInt>(I->getOperand(0)) && 
           // constant -> RHS
-          I->getOperand(0)->getType()->isInteger(1))
+          I->getOperand(0)->getType()->isIntegerTy(1))
         cast<BinaryOperator>(I)->swapOperands();
       if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(1))) 
-        if (CB->getType()->isInteger(1)) {
+        if (CB->getType()->isIntegerTy(1)) {
           if (CB->isOne())      // X & 1 -> X
             ReplaceUsesOfWith(I, I->getOperand(0), Worklist, L, LPM);
           else                  // X & 0 -> 0
@@ -1011,10 +1011,10 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
     case Instruction::Or:
       if (isa<ConstantInt>(I->getOperand(0)) &&
           // constant -> RHS
-          I->getOperand(0)->getType()->isInteger(1))
+          I->getOperand(0)->getType()->isIntegerTy(1))
         cast<BinaryOperator>(I)->swapOperands();
       if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(1)))
-        if (CB->getType()->isInteger(1)) {
+        if (CB->getType()->isIntegerTy(1)) {
           if (CB->isOne())   // X | 1 -> 1
             ReplaceUsesOfWith(I, I->getOperand(1), Worklist, L, LPM);
           else                  // X | 0 -> X
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index e0aa491..62e2977 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -42,7 +42,7 @@ static Value *isBytewiseValue(Value *V) {
   LLVMContext &Context = V->getContext();
   
   // All byte-wide stores are splatable, even of arbitrary variables.
-  if (V->getType()->isInteger(8)) return V;
+  if (V->getType()->isIntegerTy(8)) return V;
   
   // Constant float and double values can be handled as integer values if the
   // corresponding integer value is "byteable".  An important case is 0.0. 
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index bbd4b45..5aca9cd 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -182,7 +182,7 @@ unsigned Reassociate::getRank(Value *V) {
 
   // If this is a not or neg instruction, do not count it for rank.  This
   // assures us that X and ~X will have the same rank.
-  if (!I->getType()->isInteger() ||
+  if (!I->getType()->isIntegerTy() ||
       (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I)))
     ++Rank;
 
@@ -597,19 +597,35 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
 
 /// FindSingleUseMultiplyFactors - If V is a single-use multiply, recursively
 /// add its operands as factors, otherwise add V to the list of factors.
+///
+/// Ops is the top-level list of add operands we're trying to factor.
 static void FindSingleUseMultiplyFactors(Value *V,
-                                         SmallVectorImpl<Value*> &Factors) {
+                                         SmallVectorImpl<Value*> &Factors,
+                                       const SmallVectorImpl<ValueEntry> &Ops,
+                                         bool IsRoot) {
   BinaryOperator *BO;
-  if ((!V->hasOneUse() && !V->use_empty()) ||
+  if (!(V->hasOneUse() || V->use_empty()) || // More than one use.
       !(BO = dyn_cast<BinaryOperator>(V)) ||
       BO->getOpcode() != Instruction::Mul) {
     Factors.push_back(V);
     return;
   }
   
+  // If this value has a single use because it is another input to the add
+  // tree we're reassociating and we dropped its use, it actually has two
+  // uses and we can't factor it.
+  if (!IsRoot) {
+    for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+      if (Ops[i].Op == V) {
+        Factors.push_back(V);
+        return;
+      }
+  }
+  
+  
   // Otherwise, add the LHS and RHS to the list of factors.
-  FindSingleUseMultiplyFactors(BO->getOperand(1), Factors);
-  FindSingleUseMultiplyFactors(BO->getOperand(0), Factors);
+  FindSingleUseMultiplyFactors(BO->getOperand(1), Factors, Ops, false);
+  FindSingleUseMultiplyFactors(BO->getOperand(0), Factors, Ops, false);
 }
 
 /// OptimizeAndOrXor - Optimize a series of operands to an 'and', 'or', or 'xor'
@@ -753,7 +769,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
     
     // Compute all of the factors of this added value.
     SmallVector<Value*, 8> Factors;
-    FindSingleUseMultiplyFactors(BOp, Factors);
+    FindSingleUseMultiplyFactors(BOp, Factors, Ops, true);
     assert(Factors.size() > 1 && "Bad linearize!");
     
     // Add one to FactorOccurrences for each unique factor in this op.
@@ -929,8 +945,8 @@ void Reassociate::ReassociateBB(BasicBlock *BB) {
       }
 
     // Reject cases where it is pointless to do this.
-    if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPoint() || 
-        isa<VectorType>(BI->getType()))
+    if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPointTy() || 
+        BI->getType()->isVectorTy())
       continue;  // Floating point ops are not associative.
 
     // Do not reassociate boolean (i1) expressions.  We want to preserve the
@@ -939,7 +955,7 @@ void Reassociate::ReassociateBB(BasicBlock *BB) {
     // is not further optimized, it is likely to be transformed back to a
     // short-circuited form for code gen, and the source order may have been
     // optimized for the most likely conditions.
-    if (BI->getType()->isInteger(1))
+    if (BI->getType()->isIntegerTy(1))
       continue;
 
     // If this is a subtract instruction which is not already in negate form,
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 02b45a1..7e37938 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -295,7 +295,7 @@ public:
   }
 
   void markOverdefined(Value *V) {
-    assert(!isa<StructType>(V->getType()) && "Should use other method");
+    assert(!V->getType()->isStructTy() && "Should use other method");
     markOverdefined(ValueState[V], V);
   }
 
@@ -321,12 +321,12 @@ private:
   }
   
   void markConstant(Value *V, Constant *C) {
-    assert(!isa<StructType>(V->getType()) && "Should use other method");
+    assert(!V->getType()->isStructTy() && "Should use other method");
     markConstant(ValueState[V], V, C);
   }
 
   void markForcedConstant(Value *V, Constant *C) {
-    assert(!isa<StructType>(V->getType()) && "Should use other method");
+    assert(!V->getType()->isStructTy() && "Should use other method");
     ValueState[V].markForcedConstant(C);
     DEBUG(dbgs() << "markForcedConstant: " << *C << ": " << *V << '\n');
     InstWorkList.push_back(V);
@@ -360,7 +360,7 @@ private:
   }
   
   void mergeInValue(Value *V, LatticeVal MergeWithV) {
-    assert(!isa<StructType>(V->getType()) && "Should use other method");
+    assert(!V->getType()->isStructTy() && "Should use other method");
     mergeInValue(ValueState[V], V, MergeWithV);
   }
 
@@ -369,7 +369,7 @@ private:
   /// value.  This function handles the case when the value hasn't been seen yet
   /// by properly seeding constants etc.
   LatticeVal &getValueState(Value *V) {
-    assert(!isa<StructType>(V->getType()) && "Should use getStructValueState");
+    assert(!V->getType()->isStructTy() && "Should use getStructValueState");
 
     std::pair<DenseMap<Value*, LatticeVal>::iterator, bool> I =
       ValueState.insert(std::make_pair(V, LatticeVal()));
@@ -392,7 +392,7 @@ private:
   /// value/field pair.  This function handles the case when the value hasn't
   /// been seen yet by properly seeding constants etc.
   LatticeVal &getStructValueState(Value *V, unsigned i) {
-    assert(isa<StructType>(V->getType()) && "Should use getValueState");
+    assert(V->getType()->isStructTy() && "Should use getValueState");
     assert(i < cast<StructType>(V->getType())->getNumElements() &&
            "Invalid element #");
 
@@ -666,7 +666,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
 void SCCPSolver::visitPHINode(PHINode &PN) {
   // If this PN returns a struct, just mark the result overdefined.
   // TODO: We could do a lot better than this if code actually uses this.
-  if (isa<StructType>(PN.getType()))
+  if (PN.getType()->isStructTy())
     return markAnythingOverdefined(&PN);
   
   if (getValueState(&PN).isOverdefined()) {
@@ -742,7 +742,7 @@ void SCCPSolver::visitReturnInst(ReturnInst &I) {
   Value *ResultOp = I.getOperand(0);
   
   // If we are tracking the return value of this function, merge it in.
-  if (!TrackedRetVals.empty() && !isa<StructType>(ResultOp->getType())) {
+  if (!TrackedRetVals.empty() && !ResultOp->getType()->isStructTy()) {
     DenseMap<Function*, LatticeVal>::iterator TFRVI =
       TrackedRetVals.find(F);
     if (TFRVI != TrackedRetVals.end()) {
@@ -787,7 +787,7 @@ void SCCPSolver::visitCastInst(CastInst &I) {
 void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) {
   // If this returns a struct, mark all elements over defined, we don't track
   // structs in structs.
-  if (isa<StructType>(EVI.getType()))
+  if (EVI.getType()->isStructTy())
     return markAnythingOverdefined(&EVI);
     
   // If this is extracting from more than one level of struct, we don't know.
@@ -795,7 +795,7 @@ void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) {
     return markOverdefined(&EVI);
 
   Value *AggVal = EVI.getAggregateOperand();
-  if (isa<StructType>(AggVal->getType())) {
+  if (AggVal->getType()->isStructTy()) {
     unsigned i = *EVI.idx_begin();
     LatticeVal EltVal = getStructValueState(AggVal, i);
     mergeInValue(getValueState(&EVI), &EVI, EltVal);
@@ -828,7 +828,7 @@ void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) {
     }
     
     Value *Val = IVI.getInsertedValueOperand();
-    if (isa<StructType>(Val->getType()))
+    if (Val->getType()->isStructTy())
       // We don't track structs in structs.
       markOverdefined(getStructValueState(&IVI, i), &IVI);
     else {
@@ -841,7 +841,7 @@ void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) {
 void SCCPSolver::visitSelectInst(SelectInst &I) {
   // If this select returns a struct, just mark the result overdefined.
   // TODO: We could do a lot better than this if code actually uses this.
-  if (isa<StructType>(I.getType()))
+  if (I.getType()->isStructTy())
     return markAnythingOverdefined(&I);
   
   LatticeVal CondValue = getValueState(I.getCondition());
@@ -1166,7 +1166,7 @@ void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) {
 
 void SCCPSolver::visitStoreInst(StoreInst &SI) {
   // If this store is of a struct, ignore it.
-  if (isa<StructType>(SI.getOperand(0)->getType()))
+  if (SI.getOperand(0)->getType()->isStructTy())
     return;
   
   if (TrackedGlobals.empty() || !isa<GlobalVariable>(SI.getOperand(1)))
@@ -1187,7 +1187,7 @@ void SCCPSolver::visitStoreInst(StoreInst &SI) {
 // global, we can replace the load with the loaded constant value!
 void SCCPSolver::visitLoadInst(LoadInst &I) {
   // If this load is of a struct, just mark the result overdefined.
-  if (isa<StructType>(I.getType()))
+  if (I.getType()->isStructTy())
     return markAnythingOverdefined(&I);
   
   LatticeVal PtrVal = getValueState(I.getOperand(0));
@@ -1241,7 +1241,7 @@ CallOverdefined:
     
     // Otherwise, if we have a single return value case, and if the function is
     // a declaration, maybe we can constant fold it.
-    if (F && F->isDeclaration() && !isa<StructType>(I->getType()) &&
+    if (F && F->isDeclaration() && !I->getType()->isStructTy() &&
         canConstantFoldCallTo(F)) {
       
       SmallVector<Constant*, 8> Operands;
@@ -1352,7 +1352,7 @@ void SCCPSolver::Solve() {
       // since all of its users will have already been marked as overdefined.
       // Update all of the users of this instruction's value.
       //
-      if (isa<StructType>(I->getType()) || !getValueState(I).isOverdefined())
+      if (I->getType()->isStructTy() || !getValueState(I).isOverdefined())
         for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
              UI != E; ++UI)
           if (Instruction *I = dyn_cast<Instruction>(*UI))
@@ -1418,7 +1418,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
       if (!LV.isUndefined()) continue;
 
       // No instructions using structs need disambiguation.
-      if (isa<StructType>(I->getOperand(0)->getType()))
+      if (I->getOperand(0)->getType()->isStructTy())
         continue;
 
       // Get the lattice values of the first two operands for use below.
@@ -1426,7 +1426,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
       LatticeVal Op1LV;
       if (I->getNumOperands() == 2) {
         // No instructions using structs need disambiguation.
-        if (isa<StructType>(I->getOperand(1)->getType()))
+        if (I->getOperand(1)->getType()->isStructTy())
           continue;
         
         // If this is a two-operand instruction, and if both operands are
@@ -1656,7 +1656,7 @@ bool SCCP::runOnFunction(Function &F) {
         continue;
       
       // TODO: Reconstruct structs from their elements.
-      if (isa<StructType>(Inst->getType()))
+      if (Inst->getType()->isStructTy())
         continue;
       
       LatticeVal IV = Solver.getLatticeValueFor(Inst);
@@ -1792,7 +1792,7 @@ bool IPSCCP::runOnModule(Module &M) {
     if (Solver.isBlockExecutable(F->begin())) {
       for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
            AI != E; ++AI) {
-        if (AI->use_empty() || isa<StructType>(AI->getType())) continue;
+        if (AI->use_empty() || AI->getType()->isStructTy()) continue;
         
         // TODO: Could use getStructLatticeValueFor to find out if the entire
         // result is a constant and replace it entirely if so.
@@ -1835,7 +1835,7 @@ bool IPSCCP::runOnModule(Module &M) {
       
       for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
         Instruction *Inst = BI++;
-        if (Inst->getType()->isVoidTy() || isa<StructType>(Inst->getType()))
+        if (Inst->getType()->isVoidTy() || Inst->getType()->isStructTy())
           continue;
         
         // TODO: Could use getStructLatticeValueFor to find out if the entire
@@ -1918,6 +1918,14 @@ bool IPSCCP::runOnModule(Module &M) {
   // all call uses with the inferred value.  This means we don't need to bother
   // actually returning anything from the function.  Replace all return
   // instructions with return undef.
+  //
+  // Do this in two stages: first identify the functions we should process, then
+  // actually zap their returns.  This is important because we can only do this
+  // if the address of the function isn't taken.  In cases where a return is the
+  // last use of a function, the order of processing functions would affect
+  // whether other functions are optimizable.
+  SmallVector<ReturnInst*, 8> ReturnsToZap;
+  
   // TODO: Process multiple value ret instructions also.
   const DenseMap<Function*, LatticeVal> &RV = Solver.getTrackedRetVals();
   for (DenseMap<Function*, LatticeVal>::const_iterator I = RV.begin(),
@@ -1933,7 +1941,13 @@ bool IPSCCP::runOnModule(Module &M) {
     for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
       if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()))
         if (!isa<UndefValue>(RI->getOperand(0)))
-          RI->setOperand(0, UndefValue::get(F->getReturnType()));
+          ReturnsToZap.push_back(RI);
+  }
+
+  // Zap all returns which we've identified as zap to change.
+  for (unsigned i = 0, e = ReturnsToZap.size(); i != e; ++i) {
+    Function *F = ReturnsToZap[i]->getParent()->getParent();
+    ReturnsToZap[i]->setOperand(0, UndefValue::get(F->getReturnType()));
   }
     
   // If we infered constant or undef values for globals variables, we can delete
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 900d119..bbe6270 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -302,7 +302,7 @@ bool SROA::performScalarRepl(Function &F) {
       // random stuff that doesn't use vectors (e.g. <9 x double>) because then
       // we just get a lot of insert/extracts.  If at least one vector is
       // involved, then we probably really do have a union of vector/array.
-      if (VectorTy && isa<VectorType>(VectorTy) && HadAVector) {
+      if (VectorTy && VectorTy->isVectorTy() && HadAVector) {
         DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n  TYPE = "
                      << *VectorTy << '\n');
         
@@ -449,7 +449,7 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI,
   // into.
   for (; GEPIt != E; ++GEPIt) {
     // Ignore struct elements, no extra checking needed for these.
-    if (isa<StructType>(*GEPIt))
+    if ((*GEPIt)->isStructTy())
       continue;
 
     ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
@@ -480,7 +480,7 @@ void SROA::isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize,
     // (which are essentially the same as the MemIntrinsics, especially with
     // regard to copying padding between elements), or references using the
     // aggregate type of the alloca.
-    if (!MemOpType || isa<IntegerType>(MemOpType) || UsesAggregateType) {
+    if (!MemOpType || MemOpType->isIntegerTy() || UsesAggregateType) {
       if (!UsesAggregateType) {
         if (isStore)
           Info.isMemCpyDst = true;
@@ -565,7 +565,7 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
         }
         LI->replaceAllUsesWith(Insert);
         DeadInsts.push_back(LI);
-      } else if (isa<IntegerType>(LIType) &&
+      } else if (LIType->isIntegerTy() &&
                  TD->getTypeAllocSize(LIType) ==
                  TD->getTypeAllocSize(AI->getAllocatedType())) {
         // If this is a load of the entire alloca to an integer, rewrite it.
@@ -588,7 +588,7 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
           new StoreInst(Extract, NewElts[i], SI);
         }
         DeadInsts.push_back(SI);
-      } else if (isa<IntegerType>(SIType) &&
+      } else if (SIType->isIntegerTy() &&
                  TD->getTypeAllocSize(SIType) ==
                  TD->getTypeAllocSize(AI->getAllocatedType())) {
         // If this is a store of the entire alloca from an integer, rewrite it.
@@ -833,9 +833,9 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
           
           // Convert the integer value to the appropriate type.
           StoreVal = ConstantInt::get(Context, TotalVal);
-          if (isa<PointerType>(ValTy))
+          if (ValTy->isPointerTy())
             StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy);
-          else if (ValTy->isFloatingPoint())
+          else if (ValTy->isFloatingPointTy())
             StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy);
           assert(StoreVal->getType() == ValTy && "Type mismatch!");
           
@@ -939,7 +939,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
       Value *DestField = NewElts[i];
       if (EltVal->getType() == FieldTy) {
         // Storing to an integer field of this size, just do it.
-      } else if (FieldTy->isFloatingPoint() || isa<VectorType>(FieldTy)) {
+      } else if (FieldTy->isFloatingPointTy() || FieldTy->isVectorTy()) {
         // Bitcast to the right element type (for fp/vector values).
         EltVal = new BitCastInst(EltVal, FieldTy, "", SI);
       } else {
@@ -983,7 +983,8 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
       Value *DestField = NewElts[i];
       if (EltVal->getType() == ArrayEltTy) {
         // Storing to an integer field of this size, just do it.
-      } else if (ArrayEltTy->isFloatingPoint() || isa<VectorType>(ArrayEltTy)) {
+      } else if (ArrayEltTy->isFloatingPointTy() ||
+                 ArrayEltTy->isVectorTy()) {
         // Bitcast to the right element type (for fp/vector values).
         EltVal = new BitCastInst(EltVal, ArrayEltTy, "", SI);
       } else {
@@ -1043,8 +1044,8 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
     
     const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(), 
                                                      FieldSizeBits);
-    if (!isa<IntegerType>(FieldTy) && !FieldTy->isFloatingPoint() &&
-        !isa<VectorType>(FieldTy))
+    if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() &&
+        !FieldTy->isVectorTy())
       SrcField = new BitCastInst(SrcField,
                                  PointerType::getUnqual(FieldIntTy),
                                  "", LI);
@@ -1182,7 +1183,7 @@ static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy,
         return;
       }
     } else if (In->isFloatTy() || In->isDoubleTy() ||
-               (isa<IntegerType>(In) && In->getPrimitiveSizeInBits() >= 8 &&
+               (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 &&
                 isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
       // If we're accessing something that could be an element of a vector, see
       // if the implied vector agrees with what we already have and if Offset is
@@ -1226,7 +1227,7 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
         return false;
       MergeInType(LI->getType(), Offset, VecTy,
                   AllocaSize, *TD, V->getContext());
-      SawVec |= isa<VectorType>(LI->getType());
+      SawVec |= LI->getType()->isVectorTy();
       continue;
     }
     
@@ -1235,7 +1236,7 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
       if (SI->getOperand(0) == V || SI->isVolatile()) return 0;
       MergeInType(SI->getOperand(0)->getType(), Offset,
                   VecTy, AllocaSize, *TD, V->getContext());
-      SawVec |= isa<VectorType>(SI->getOperand(0)->getType());
+      SawVec |= SI->getOperand(0)->getType()->isVectorTy();
       continue;
     }
     
@@ -1437,7 +1438,7 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
   // If the result alloca is a vector type, this is either an element
   // access or a bitcast to another vector type of the same size.
   if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) {
-    if (isa<VectorType>(ToType))
+    if (ToType->isVectorTy())
       return Builder.CreateBitCast(FromVal, ToType, "tmp");
 
     // Otherwise it must be an element access.
@@ -1520,9 +1521,9 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
                                                     LIBitWidth), "tmp");
 
   // If the result is an integer, this is a trunc or bitcast.
-  if (isa<IntegerType>(ToType)) {
+  if (ToType->isIntegerTy()) {
     // Should be done.
-  } else if (ToType->isFloatingPoint() || isa<VectorType>(ToType)) {
+  } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) {
     // Just do a bitcast, we know the sizes match up.
     FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp");
   } else {
@@ -1600,10 +1601,10 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old,
   unsigned DestWidth = TD->getTypeSizeInBits(AllocaType);
   unsigned SrcStoreWidth = TD->getTypeStoreSizeInBits(SV->getType());
   unsigned DestStoreWidth = TD->getTypeStoreSizeInBits(AllocaType);
-  if (SV->getType()->isFloatingPoint() || isa<VectorType>(SV->getType()))
+  if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy())
     SV = Builder.CreateBitCast(SV,
                             IntegerType::get(SV->getContext(),SrcWidth), "tmp");
-  else if (isa<PointerType>(SV->getType()))
+  else if (SV->getType()->isPointerTy())
     SV = Builder.CreatePtrToInt(SV, TD->getIntPtrType(SV->getContext()), "tmp");
 
   // Zero extend or truncate the value if needed.
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 4216e8f..05027ae 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -17,6 +17,7 @@
 
 #define DEBUG_TYPE "simplify-libcalls"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
@@ -67,496 +68,14 @@ public:
       Context = &CI->getCalledFunction()->getContext();
     return CallOptimizer(CI->getCalledFunction(), CI, B);
   }
-
-  /// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*.
-  Value *CastToCStr(Value *V, IRBuilder<> &B);
-
-  /// EmitStrLen - Emit a call to the strlen function to the builder, for the
-  /// specified pointer.  Ptr is required to be some pointer type, and the
-  /// return value has 'intptr_t' type.
-  Value *EmitStrLen(Value *Ptr, IRBuilder<> &B);
-
-  /// EmitStrChr - Emit a call to the strchr function to the builder, for the
-  /// specified pointer and character.  Ptr is required to be some pointer type,
-  /// and the return value has 'i8*' type.
-  Value *EmitStrChr(Value *Ptr, char C, IRBuilder<> &B);
-
-  /// EmitStrCpy - Emit a call to the strcpy function to the builder, for the
-  /// specified pointer arguments.
-  Value *EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B);
-  
-  /// EmitMemCpy - Emit a call to the memcpy function to the builder.  This
-  /// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
-  Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len,
-                    unsigned Align, IRBuilder<> &B);
-
-  /// EmitMemMove - Emit a call to the memmove function to the builder.  This
-  /// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
-  Value *EmitMemMove(Value *Dst, Value *Src, Value *Len,
-		     unsigned Align, IRBuilder<> &B);
-
-  /// EmitMemChr - Emit a call to the memchr function.  This assumes that Ptr is
-  /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
-  Value *EmitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B);
-
-  /// EmitMemCmp - Emit a call to the memcmp function.
-  Value *EmitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B);
-
-  /// EmitMemSet - Emit a call to the memset function
-  Value *EmitMemSet(Value *Dst, Value *Val, Value *Len, IRBuilder<> &B);
-
-  /// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name'
-  /// (e.g.  'floor').  This function is known to take a single of type matching
-  /// 'Op' and returns one value with the same type.  If 'Op' is a long double,
-  /// 'l' is added as the suffix of name, if 'Op' is a float, we add a 'f'
-  /// suffix.
-  Value *EmitUnaryFloatFnCall(Value *Op, const char *Name, IRBuilder<> &B,
-                              const AttrListPtr &Attrs);
-
-  /// EmitPutChar - Emit a call to the putchar function.  This assumes that Char
-  /// is an integer.
-  Value *EmitPutChar(Value *Char, IRBuilder<> &B);
-
-  /// EmitPutS - Emit a call to the puts function.  This assumes that Str is
-  /// some pointer.
-  void EmitPutS(Value *Str, IRBuilder<> &B);
-
-  /// EmitFPutC - Emit a call to the fputc function.  This assumes that Char is
-  /// an i32, and File is a pointer to FILE.
-  void EmitFPutC(Value *Char, Value *File, IRBuilder<> &B);
-
-  /// EmitFPutS - Emit a call to the puts function.  Str is required to be a
-  /// pointer and File is a pointer to FILE.
-  void EmitFPutS(Value *Str, Value *File, IRBuilder<> &B);
-
-  /// EmitFWrite - Emit a call to the fwrite function.  This assumes that Ptr is
-  /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
-  void EmitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B);
-
 };
 } // End anonymous namespace.
 
-/// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*.
-Value *LibCallOptimization::CastToCStr(Value *V, IRBuilder<> &B) {
-  return B.CreateBitCast(V, Type::getInt8PtrTy(*Context), "cstr");
-}
-
-/// EmitStrLen - Emit a call to the strlen function to the builder, for the
-/// specified pointer.  This always returns an integer value of size intptr_t.
-Value *LibCallOptimization::EmitStrLen(Value *Ptr, IRBuilder<> &B) {
-  Module *M = Caller->getParent();
-  AttributeWithIndex AWI[2];
-  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
-  AWI[1] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
-                                   Attribute::NoUnwind);
-
-  Constant *StrLen =M->getOrInsertFunction("strlen", AttrListPtr::get(AWI, 2),
-                                           TD->getIntPtrType(*Context),
-                                           Type::getInt8PtrTy(*Context),
-                                           NULL);
-  CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");
-  if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts()))
-    CI->setCallingConv(F->getCallingConv());
-
-  return CI;
-}
-
-/// EmitStrChr - Emit a call to the strchr function to the builder, for the
-/// specified pointer and character.  Ptr is required to be some pointer type,
-/// and the return value has 'i8*' type.
-Value *LibCallOptimization::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B) {
-  Module *M = Caller->getParent();
-  AttributeWithIndex AWI =
-    AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
-
-  const Type *I8Ptr = Type::getInt8PtrTy(*Context);
-  const Type *I32Ty = Type::getInt32Ty(*Context);
-  Constant *StrChr = M->getOrInsertFunction("strchr", AttrListPtr::get(&AWI, 1),
-                                            I8Ptr, I8Ptr, I32Ty, NULL);
-  CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B),
-                               ConstantInt::get(I32Ty, C), "strchr");
-  if (const Function *F = dyn_cast<Function>(StrChr->stripPointerCasts()))
-    CI->setCallingConv(F->getCallingConv());
-  return CI;
-}
-
-/// EmitStrCpy - Emit a call to the strcpy function to the builder, for the
-/// specified pointer arguments.
-Value *LibCallOptimization::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B) {
-  Module *M = Caller->getParent();
-  AttributeWithIndex AWI[2];
-  AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
-  AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
-  const Type *I8Ptr = Type::getInt8PtrTy(*Context);
-  Value *StrCpy = M->getOrInsertFunction("strcpy", AttrListPtr::get(AWI, 2),
-                                         I8Ptr, I8Ptr, I8Ptr, NULL);
-  CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
-                               "strcpy");
-  if (const Function *F = dyn_cast<Function>(StrCpy->stripPointerCasts()))
-    CI->setCallingConv(F->getCallingConv());
-  return CI;
-}
-
-/// EmitMemCpy - Emit a call to the memcpy function to the builder.  This always
-/// expects that the size has type 'intptr_t' and Dst/Src are pointers.
-Value *LibCallOptimization::EmitMemCpy(Value *Dst, Value *Src, Value *Len,
-                                       unsigned Align, IRBuilder<> &B) {
-  Module *M = Caller->getParent();
-  const Type *Ty = Len->getType();
-  Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, &Ty, 1);
-  Dst = CastToCStr(Dst, B);
-  Src = CastToCStr(Src, B);
-  return B.CreateCall4(MemCpy, Dst, Src, Len,
-                       ConstantInt::get(Type::getInt32Ty(*Context), Align));
-}
-
-/// EmitMemMove - Emit a call to the memmove function to the builder.  This
-/// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
-Value *LibCallOptimization::EmitMemMove(Value *Dst, Value *Src, Value *Len,
-					unsigned Align, IRBuilder<> &B) {
-  Module *M = Caller->getParent();
-  const Type *Ty = TD->getIntPtrType(*Context);
-  Value *MemMove = Intrinsic::getDeclaration(M, Intrinsic::memmove, &Ty, 1);
-  Dst = CastToCStr(Dst, B);
-  Src = CastToCStr(Src, B);
-  Value *A = ConstantInt::get(Type::getInt32Ty(*Context), Align);
-  return B.CreateCall4(MemMove, Dst, Src, Len, A);
-}
-
-/// EmitMemChr - Emit a call to the memchr function.  This assumes that Ptr is
-/// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
-Value *LibCallOptimization::EmitMemChr(Value *Ptr, Value *Val,
-                                       Value *Len, IRBuilder<> &B) {
-  Module *M = Caller->getParent();
-  AttributeWithIndex AWI;
-  AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
-
-  Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(&AWI, 1),
-                                         Type::getInt8PtrTy(*Context),
-                                         Type::getInt8PtrTy(*Context),
-                                         Type::getInt32Ty(*Context),
-                                         TD->getIntPtrType(*Context),
-                                         NULL);
-  CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr");
-
-  if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts()))
-    CI->setCallingConv(F->getCallingConv());
-
-  return CI;
-}
-
-/// EmitMemCmp - Emit a call to the memcmp function.
-Value *LibCallOptimization::EmitMemCmp(Value *Ptr1, Value *Ptr2,
-                                       Value *Len, IRBuilder<> &B) {
-  Module *M = Caller->getParent();
-  AttributeWithIndex AWI[3];
-  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
-  AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
-  AWI[2] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
-                                   Attribute::NoUnwind);
-
-  Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI, 3),
-                                         Type::getInt32Ty(*Context),
-                                         Type::getInt8PtrTy(*Context),
-                                         Type::getInt8PtrTy(*Context),
-                                         TD->getIntPtrType(*Context), NULL);
-  CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B),
-                               Len, "memcmp");
-
-  if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts()))
-    CI->setCallingConv(F->getCallingConv());
-
-  return CI;
-}
-
-/// EmitMemSet - Emit a call to the memset function
-Value *LibCallOptimization::EmitMemSet(Value *Dst, Value *Val,
-                                       Value *Len, IRBuilder<> &B) {
- Module *M = Caller->getParent();
- Intrinsic::ID IID = Intrinsic::memset;
- const Type *Tys[1];
- Tys[0] = Len->getType();
- Value *MemSet = Intrinsic::getDeclaration(M, IID, Tys, 1);
- Value *Align = ConstantInt::get(Type::getInt32Ty(*Context), 1);
- return B.CreateCall4(MemSet, CastToCStr(Dst, B), Val, Len, Align);
-}
-
-/// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g.
-/// 'floor').  This function is known to take a single of type matching 'Op' and
-/// returns one value with the same type.  If 'Op' is a long double, 'l' is
-/// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix.
-Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name,
-                                                 IRBuilder<> &B,
-                                                 const AttrListPtr &Attrs) {
-  char NameBuffer[20];
-  if (!Op->getType()->isDoubleTy()) {
-    // If we need to add a suffix, copy into NameBuffer.
-    unsigned NameLen = strlen(Name);
-    assert(NameLen < sizeof(NameBuffer)-2);
-    memcpy(NameBuffer, Name, NameLen);
-    if (Op->getType()->isFloatTy())
-      NameBuffer[NameLen] = 'f';  // floorf
-    else
-      NameBuffer[NameLen] = 'l';  // floorl
-    NameBuffer[NameLen+1] = 0;
-    Name = NameBuffer;
-  }
-
-  Module *M = Caller->getParent();
-  Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
-                                         Op->getType(), NULL);
-  CallInst *CI = B.CreateCall(Callee, Op, Name);
-  CI->setAttributes(Attrs);
-  if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
-    CI->setCallingConv(F->getCallingConv());
-
-  return CI;
-}
-
-/// EmitPutChar - Emit a call to the putchar function.  This assumes that Char
-/// is an integer.
-Value *LibCallOptimization::EmitPutChar(Value *Char, IRBuilder<> &B) {
-  Module *M = Caller->getParent();
-  Value *PutChar = M->getOrInsertFunction("putchar", Type::getInt32Ty(*Context),
-                                          Type::getInt32Ty(*Context), NULL);
-  CallInst *CI = B.CreateCall(PutChar,
-                              B.CreateIntCast(Char,
-                              Type::getInt32Ty(*Context),
-                              /*isSigned*/true,
-                              "chari"),
-                              "putchar");
-
-  if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts()))
-    CI->setCallingConv(F->getCallingConv());
-  return CI;
-}
-
-/// EmitPutS - Emit a call to the puts function.  This assumes that Str is
-/// some pointer.
-void LibCallOptimization::EmitPutS(Value *Str, IRBuilder<> &B) {
-  Module *M = Caller->getParent();
-  AttributeWithIndex AWI[2];
-  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
-  AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
-
-  Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2),
-                                       Type::getInt32Ty(*Context),
-                                       Type::getInt8PtrTy(*Context),
-                                       NULL);
-  CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts");
-  if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts()))
-    CI->setCallingConv(F->getCallingConv());
-
-}
-
-/// EmitFPutC - Emit a call to the fputc function.  This assumes that Char is
-/// an integer and File is a pointer to FILE.
-void LibCallOptimization::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B) {
-  Module *M = Caller->getParent();
-  AttributeWithIndex AWI[2];
-  AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
-  AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
-  Constant *F;
-  if (isa<PointerType>(File->getType()))
-    F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2),
-                               Type::getInt32Ty(*Context),
-                               Type::getInt32Ty(*Context), File->getType(),
-                               NULL);
-  else
-    F = M->getOrInsertFunction("fputc",
-                               Type::getInt32Ty(*Context),
-                               Type::getInt32Ty(*Context),
-                               File->getType(), NULL);
-  Char = B.CreateIntCast(Char, Type::getInt32Ty(*Context), /*isSigned*/true,
-                         "chari");
-  CallInst *CI = B.CreateCall2(F, Char, File, "fputc");
-
-  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
-    CI->setCallingConv(Fn->getCallingConv());
-}
-
-/// EmitFPutS - Emit a call to the puts function.  Str is required to be a
-/// pointer and File is a pointer to FILE.
-void LibCallOptimization::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B) {
-  Module *M = Caller->getParent();
-  AttributeWithIndex AWI[3];
-  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
-  AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
-  AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
-  Constant *F;
-  if (isa<PointerType>(File->getType()))
-    F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3),
-                               Type::getInt32Ty(*Context),
-                               Type::getInt8PtrTy(*Context),
-                               File->getType(), NULL);
-  else
-    F = M->getOrInsertFunction("fputs", Type::getInt32Ty(*Context),
-                               Type::getInt8PtrTy(*Context),
-                               File->getType(), NULL);
-  CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs");
-
-  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
-    CI->setCallingConv(Fn->getCallingConv());
-}
-
-/// EmitFWrite - Emit a call to the fwrite function.  This assumes that Ptr is
-/// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
-void LibCallOptimization::EmitFWrite(Value *Ptr, Value *Size, Value *File,
-                                     IRBuilder<> &B) {
-  Module *M = Caller->getParent();
-  AttributeWithIndex AWI[3];
-  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
-  AWI[1] = AttributeWithIndex::get(4, Attribute::NoCapture);
-  AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
-  Constant *F;
-  if (isa<PointerType>(File->getType()))
-    F = M->getOrInsertFunction("fwrite", AttrListPtr::get(AWI, 3),
-                               TD->getIntPtrType(*Context),
-                               Type::getInt8PtrTy(*Context),
-                               TD->getIntPtrType(*Context),
-                               TD->getIntPtrType(*Context),
-                               File->getType(), NULL);
-  else
-    F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(*Context),
-                               Type::getInt8PtrTy(*Context),
-                               TD->getIntPtrType(*Context),
-                               TD->getIntPtrType(*Context),
-                               File->getType(), NULL);
-  CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size,
-                        ConstantInt::get(TD->getIntPtrType(*Context), 1), File);
-
-  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
-    CI->setCallingConv(Fn->getCallingConv());
-}
 
 //===----------------------------------------------------------------------===//
 // Helper Functions
 //===----------------------------------------------------------------------===//
 
-/// GetStringLengthH - If we can compute the length of the string pointed to by
-/// the specified pointer, return 'len+1'.  If we can't, return 0.
-static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
-  // Look through noop bitcast instructions.
-  if (BitCastInst *BCI = dyn_cast<BitCastInst>(V))
-    return GetStringLengthH(BCI->getOperand(0), PHIs);
-
-  // If this is a PHI node, there are two cases: either we have already seen it
-  // or we haven't.
-  if (PHINode *PN = dyn_cast<PHINode>(V)) {
-    if (!PHIs.insert(PN))
-      return ~0ULL;  // already in the set.
-
-    // If it was new, see if all the input strings are the same length.
-    uint64_t LenSoFar = ~0ULL;
-    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
-      uint64_t Len = GetStringLengthH(PN->getIncomingValue(i), PHIs);
-      if (Len == 0) return 0; // Unknown length -> unknown.
-
-      if (Len == ~0ULL) continue;
-
-      if (Len != LenSoFar && LenSoFar != ~0ULL)
-        return 0;    // Disagree -> unknown.
-      LenSoFar = Len;
-    }
-
-    // Success, all agree.
-    return LenSoFar;
-  }
-
-  // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
-  if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
-    uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs);
-    if (Len1 == 0) return 0;
-    uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs);
-    if (Len2 == 0) return 0;
-    if (Len1 == ~0ULL) return Len2;
-    if (Len2 == ~0ULL) return Len1;
-    if (Len1 != Len2) return 0;
-    return Len1;
-  }
-
-  // If the value is not a GEP instruction nor a constant expression with a
-  // GEP instruction, then return unknown.
-  User *GEP = 0;
-  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) {
-    GEP = GEPI;
-  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
-    if (CE->getOpcode() != Instruction::GetElementPtr)
-      return 0;
-    GEP = CE;
-  } else {
-    return 0;
-  }
-
-  // Make sure the GEP has exactly three arguments.
-  if (GEP->getNumOperands() != 3)
-    return 0;
-
-  // Check to make sure that the first operand of the GEP is an integer and
-  // has value 0 so that we are sure we're indexing into the initializer.
-  if (ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(1))) {
-    if (!Idx->isZero())
-      return 0;
-  } else
-    return 0;
-
-  // If the second index isn't a ConstantInt, then this is a variable index
-  // into the array.  If this occurs, we can't say anything meaningful about
-  // the string.
-  uint64_t StartIdx = 0;
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2)))
-    StartIdx = CI->getZExtValue();
-  else
-    return 0;
-
-  // The GEP instruction, constant or instruction, must reference a global
-  // variable that is a constant and is initialized. The referenced constant
-  // initializer is the array that we'll use for optimization.
-  GlobalVariable* GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
-  if (!GV || !GV->isConstant() || !GV->hasInitializer() ||
-      GV->mayBeOverridden())
-    return 0;
-  Constant *GlobalInit = GV->getInitializer();
-
-  // Handle the ConstantAggregateZero case, which is a degenerate case. The
-  // initializer is constant zero so the length of the string must be zero.
-  if (isa<ConstantAggregateZero>(GlobalInit))
-    return 1;  // Len = 0 offset by 1.
-
-  // Must be a Constant Array
-  ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
-  if (!Array || !Array->getType()->getElementType()->isInteger(8))
-    return false;
-
-  // Get the number of elements in the array
-  uint64_t NumElts = Array->getType()->getNumElements();
-
-  // Traverse the constant array from StartIdx (derived above) which is
-  // the place the GEP refers to in the array.
-  for (unsigned i = StartIdx; i != NumElts; ++i) {
-    Constant *Elt = Array->getOperand(i);
-    ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
-    if (!CI) // This array isn't suitable, non-int initializer.
-      return 0;
-    if (CI->isZero())
-      return i-StartIdx+1; // We found end of string, success!
-  }
-
-  return 0; // The array isn't null terminated, conservatively return 'unknown'.
-}
-
-/// GetStringLength - If we can compute the length of the string pointed to by
-/// the specified pointer, return 'len+1'.  If we can't, return 0.
-static uint64_t GetStringLength(Value *V) {
-  if (!isa<PointerType>(V->getType())) return 0;
-
-  SmallPtrSet<PHINode*, 32> PHIs;
-  uint64_t Len = GetStringLengthH(V, PHIs);
-  // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
-  // an empty string as a length.
-  return Len == ~0ULL ? 1 : Len;
-}
-
 /// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
 /// value is equal or not-equal to zero.
 static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
@@ -613,7 +132,7 @@ struct StrCatOpt : public LibCallOptimization {
   void EmitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) {
     // We need to find the end of the destination string.  That's where the
     // memory is to be moved to. We just generate a call to strlen.
-    Value *DstLen = EmitStrLen(Dst, B);
+    Value *DstLen = EmitStrLen(Dst, B, TD);
 
     // Now that we have the destination's length, we must index into the
     // destination's pointer to get the actual memcpy destination (end of
@@ -623,7 +142,7 @@ struct StrCatOpt : public LibCallOptimization {
     // We have enough information to now generate the memcpy call to do the
     // concatenation for us.  Make a memcpy to copy the nul byte with align = 1.
     EmitMemCpy(CpyDst, Src,
-               ConstantInt::get(TD->getIntPtrType(*Context), Len+1), 1, B);
+               ConstantInt::get(TD->getIntPtrType(*Context), Len+1), 1, B, TD);
   }
 };
 
@@ -638,7 +157,7 @@ struct StrNCatOpt : public StrCatOpt {
         FT->getReturnType() != Type::getInt8PtrTy(*Context) ||
         FT->getParamType(0) != FT->getReturnType() ||
         FT->getParamType(1) != FT->getReturnType() ||
-        !isa<IntegerType>(FT->getParamType(2)))
+        !FT->getParamType(2)->isIntegerTy())
       return 0;
 
     // Extract some information from the instruction
@@ -697,11 +216,12 @@ struct StrChrOpt : public LibCallOptimization {
       if (!TD) return 0;
 
       uint64_t Len = GetStringLength(SrcStr);
-      if (Len == 0 || !FT->getParamType(1)->isInteger(32)) // memchr needs i32.
+      if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32.
         return 0;
 
       return EmitMemChr(SrcStr, CI->getOperand(2), // include nul.
-                        ConstantInt::get(TD->getIntPtrType(*Context), Len), B);
+                        ConstantInt::get(TD->getIntPtrType(*Context), Len),
+                        B, TD);
     }
 
     // Otherwise, the character is a constant, see if the first argument is
@@ -739,7 +259,7 @@ struct StrCmpOpt : public LibCallOptimization {
     // Verify the "strcmp" function prototype.
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 2 ||
-	!FT->getReturnType()->isInteger(32) ||
+	!FT->getReturnType()->isIntegerTy(32) ||
         FT->getParamType(0) != FT->getParamType(1) ||
         FT->getParamType(0) != Type::getInt8PtrTy(*Context))
       return 0;
@@ -772,7 +292,7 @@ struct StrCmpOpt : public LibCallOptimization {
 
       return EmitMemCmp(Str1P, Str2P,
                         ConstantInt::get(TD->getIntPtrType(*Context),
-                        std::min(Len1, Len2)), B);
+                        std::min(Len1, Len2)), B, TD);
     }
 
     return 0;
@@ -787,10 +307,10 @@ struct StrNCmpOpt : public LibCallOptimization {
     // Verify the "strncmp" function prototype.
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 3 ||
-	!FT->getReturnType()->isInteger(32) ||
+	!FT->getReturnType()->isIntegerTy(32) ||
         FT->getParamType(0) != FT->getParamType(1) ||
         FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
-        !isa<IntegerType>(FT->getParamType(2)))
+        !FT->getParamType(2)->isIntegerTy())
       return 0;
 
     Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2);
@@ -852,7 +372,7 @@ struct StrCpyOpt : public LibCallOptimization {
     // We have enough information to now generate the memcpy call to do the
     // concatenation for us.  Make a memcpy to copy the nul byte with align = 1.
     EmitMemCpy(Dst, Src,
-               ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B);
+               ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B, TD);
     return Dst;
   }
 };
@@ -866,7 +386,7 @@ struct StrNCpyOpt : public LibCallOptimization {
     if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
         FT->getParamType(0) != FT->getParamType(1) ||
         FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
-        !isa<IntegerType>(FT->getParamType(2)))
+        !FT->getParamType(2)->isIntegerTy())
       return 0;
 
     Value *Dst = CI->getOperand(1);
@@ -881,7 +401,7 @@ struct StrNCpyOpt : public LibCallOptimization {
     if (SrcLen == 0) {
       // strncpy(x, "", y) -> memset(x, '\0', y, 1)
       EmitMemSet(Dst, ConstantInt::get(Type::getInt8Ty(*Context), '\0'), LenOp,
-		 B);
+		             B, TD);
       return Dst;
     }
 
@@ -901,7 +421,7 @@ struct StrNCpyOpt : public LibCallOptimization {
 
     // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
     EmitMemCpy(Dst, Src,
-               ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B);
+               ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B, TD);
 
     return Dst;
   }
@@ -915,7 +435,7 @@ struct StrLenOpt : public LibCallOptimization {
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 1 ||
         FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
-        !isa<IntegerType>(FT->getReturnType()))
+        !FT->getReturnType()->isIntegerTy())
       return 0;
 
     Value *Src = CI->getOperand(1);
@@ -939,8 +459,8 @@ struct StrToOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     const FunctionType *FT = Callee->getFunctionType();
     if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) ||
-        !isa<PointerType>(FT->getParamType(0)) ||
-        !isa<PointerType>(FT->getParamType(1)))
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy())
       return 0;
 
     Value *EndPtr = CI->getOperand(2);
@@ -960,9 +480,9 @@ struct StrStrOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 2 ||
-        !isa<PointerType>(FT->getParamType(0)) ||
-        !isa<PointerType>(FT->getParamType(1)) ||
-        !isa<PointerType>(FT->getReturnType()))
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        !FT->getReturnType()->isPointerTy())
       return 0;
 
     // fold strstr(x, x) -> x.
@@ -993,7 +513,7 @@ struct StrStrOpt : public LibCallOptimization {
 
     // fold strstr(x, "y") -> strchr(x, 'y').
     if (HasStr2 && ToFindStr.size() == 1)
-      return B.CreateBitCast(EmitStrChr(CI->getOperand(1), ToFindStr[0], B),
+      return B.CreateBitCast(EmitStrChr(CI->getOperand(1), ToFindStr[0], B, TD),
                              CI->getType());
     return 0;
   }
@@ -1006,9 +526,9 @@ struct StrStrOpt : public LibCallOptimization {
 struct MemCmpOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     const FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 3 || !isa<PointerType>(FT->getParamType(0)) ||
-        !isa<PointerType>(FT->getParamType(1)) ||
-        !FT->getReturnType()->isInteger(32))
+    if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        !FT->getReturnType()->isIntegerTy(32))
       return 0;
 
     Value *LHS = CI->getOperand(1), *RHS = CI->getOperand(2);
@@ -1055,13 +575,14 @@ struct MemCpyOpt : public LibCallOptimization {
 
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
-        !isa<PointerType>(FT->getParamType(0)) ||
-        !isa<PointerType>(FT->getParamType(1)) ||
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
         FT->getParamType(2) != TD->getIntPtrType(*Context))
       return 0;
 
     // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
-    EmitMemCpy(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), 1, B);
+    EmitMemCpy(CI->getOperand(1), CI->getOperand(2),
+               CI->getOperand(3), 1, B, TD);
     return CI->getOperand(1);
   }
 };
@@ -1076,13 +597,14 @@ struct MemMoveOpt : public LibCallOptimization {
 
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
-        !isa<PointerType>(FT->getParamType(0)) ||
-        !isa<PointerType>(FT->getParamType(1)) ||
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
         FT->getParamType(2) != TD->getIntPtrType(*Context))
       return 0;
 
     // memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
-    EmitMemMove(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), 1, B);
+    EmitMemMove(CI->getOperand(1), CI->getOperand(2),
+                CI->getOperand(3), 1, B, TD);
     return CI->getOperand(1);
   }
 };
@@ -1097,137 +619,20 @@ struct MemSetOpt : public LibCallOptimization {
 
     const FunctionType *FT = Callee->getFunctionType();
     if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
-        !isa<PointerType>(FT->getParamType(0)) ||
-        !isa<IntegerType>(FT->getParamType(1)) ||
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isIntegerTy() ||
         FT->getParamType(2) != TD->getIntPtrType(*Context))
       return 0;
 
     // memset(p, v, n) -> llvm.memset(p, v, n, 1)
     Value *Val = B.CreateIntCast(CI->getOperand(2), Type::getInt8Ty(*Context),
 				 false);
-    EmitMemSet(CI->getOperand(1), Val,  CI->getOperand(3), B);
+    EmitMemSet(CI->getOperand(1), Val,  CI->getOperand(3), B, TD);
     return CI->getOperand(1);
   }
 };
 
 //===----------------------------------------------------------------------===//
-// Object Size Checking Optimizations
-//===----------------------------------------------------------------------===//
-
-//===---------------------------------------===//
-// 'memcpy_chk' Optimizations
-
-struct MemCpyChkOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // These optimizations require TargetData.
-    if (!TD) return 0;
-
-    const FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
-        !isa<PointerType>(FT->getParamType(0)) ||
-        !isa<PointerType>(FT->getParamType(1)) ||
-        !isa<IntegerType>(FT->getParamType(3)) ||
-        FT->getParamType(2) != TD->getIntPtrType(*Context))
-      return 0;
-
-    ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4));
-    if (!SizeCI)
-      return 0;
-    if (SizeCI->isAllOnesValue()) {
-      EmitMemCpy(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), 1, B);
-      return CI->getOperand(1);
-    }
-
-    return 0;
-  }
-};
-
-//===---------------------------------------===//
-// 'memset_chk' Optimizations
-
-struct MemSetChkOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // These optimizations require TargetData.
-    if (!TD) return 0;
-
-    const FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
-        !isa<PointerType>(FT->getParamType(0)) ||
-        !isa<IntegerType>(FT->getParamType(1)) ||
-        !isa<IntegerType>(FT->getParamType(3)) ||
-        FT->getParamType(2) != TD->getIntPtrType(*Context))
-      return 0;
-
-    ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4));
-    if (!SizeCI)
-      return 0;
-    if (SizeCI->isAllOnesValue()) {
-      Value *Val = B.CreateIntCast(CI->getOperand(2), Type::getInt8Ty(*Context),
-				   false);
-      EmitMemSet(CI->getOperand(1), Val,  CI->getOperand(3), B);
-      return CI->getOperand(1);
-    }
-
-    return 0;
-  }
-};
-
-//===---------------------------------------===//
-// 'memmove_chk' Optimizations
-
-struct MemMoveChkOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // These optimizations require TargetData.
-    if (!TD) return 0;
-
-    const FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
-        !isa<PointerType>(FT->getParamType(0)) ||
-        !isa<PointerType>(FT->getParamType(1)) ||
-        !isa<IntegerType>(FT->getParamType(3)) ||
-        FT->getParamType(2) != TD->getIntPtrType(*Context))
-      return 0;
-
-    ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4));
-    if (!SizeCI)
-      return 0;
-    if (SizeCI->isAllOnesValue()) {
-      EmitMemMove(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3),
-		  1, B);
-      return CI->getOperand(1);
-    }
-
-    return 0;
-  }
-};
-
-struct StrCpyChkOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    const FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
-        !isa<PointerType>(FT->getParamType(0)) ||
-        !isa<PointerType>(FT->getParamType(1)))
-      return 0;
-
-    ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(3));
-    if (!SizeCI)
-      return 0;
-    
-    // If a) we don't have any length information, or b) we know this will
-    // fit then just lower to a plain strcpy. Otherwise we'll keep our
-    // strcpy_chk call which may fail at runtime if the size is too long.
-    // TODO: It might be nice to get a maximum length out of the possible
-    // string lengths for varying.
-    if (SizeCI->isAllOnesValue() ||
-        SizeCI->getZExtValue() >= GetStringLength(CI->getOperand(2)))
-      return EmitStrCpy(CI->getOperand(1), CI->getOperand(2), B);
-
-    return 0;
-  }
-};
-
-  
-//===----------------------------------------------------------------------===//
 // Math Library Optimizations
 //===----------------------------------------------------------------------===//
 
@@ -1241,7 +646,7 @@ struct PowOpt : public LibCallOptimization {
     // result type.
     if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
         FT->getParamType(0) != FT->getParamType(1) ||
-        !FT->getParamType(0)->isFloatingPoint())
+        !FT->getParamType(0)->isFloatingPointTy())
       return 0;
 
     Value *Op1 = CI->getOperand(1), *Op2 = CI->getOperand(2);
@@ -1295,7 +700,7 @@ struct Exp2Opt : public LibCallOptimization {
     // Just make sure this has 1 argument of FP type, which matches the
     // result type.
     if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
-        !FT->getParamType(0)->isFloatingPoint())
+        !FT->getParamType(0)->isFloatingPointTy())
       return 0;
 
     Value *Op = CI->getOperand(1);
@@ -1375,8 +780,8 @@ struct FFSOpt : public LibCallOptimization {
     // Just make sure this has 2 arguments of the same FP type, which match the
     // result type.
     if (FT->getNumParams() != 1 ||
-	!FT->getReturnType()->isInteger(32) ||
-        !isa<IntegerType>(FT->getParamType(0)))
+	!FT->getReturnType()->isIntegerTy(32) ||
+        !FT->getParamType(0)->isIntegerTy())
       return 0;
 
     Value *Op = CI->getOperand(1);
@@ -1410,8 +815,8 @@ struct IsDigitOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     const FunctionType *FT = Callee->getFunctionType();
     // We require integer(i32)
-    if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) ||
-        !FT->getParamType(0)->isInteger(32))
+    if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
+        !FT->getParamType(0)->isIntegerTy(32))
       return 0;
 
     // isdigit(c) -> (c-'0') <u 10
@@ -1431,8 +836,8 @@ struct IsAsciiOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     const FunctionType *FT = Callee->getFunctionType();
     // We require integer(i32)
-    if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) ||
-        !FT->getParamType(0)->isInteger(32))
+    if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
+        !FT->getParamType(0)->isIntegerTy(32))
       return 0;
 
     // isascii(c) -> c <u 128
@@ -1450,7 +855,7 @@ struct AbsOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     const FunctionType *FT = Callee->getFunctionType();
     // We require integer(integer) where the types agree.
-    if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) ||
+    if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
         FT->getParamType(0) != FT->getReturnType())
       return 0;
 
@@ -1473,7 +878,7 @@ struct ToAsciiOpt : public LibCallOptimization {
     const FunctionType *FT = Callee->getFunctionType();
     // We require i32(i32)
     if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
-        !FT->getParamType(0)->isInteger(32))
+        !FT->getParamType(0)->isIntegerTy(32))
       return 0;
 
     // isascii(c) -> c & 0x7f
@@ -1493,8 +898,8 @@ struct PrintFOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     // Require one fixed pointer argument and an integer/void result.
     const FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() < 1 || !isa<PointerType>(FT->getParamType(0)) ||
-        !(isa<IntegerType>(FT->getReturnType()) ||
+    if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
+        !(FT->getReturnType()->isIntegerTy() ||
           FT->getReturnType()->isVoidTy()))
       return 0;
 
@@ -1512,7 +917,7 @@ struct PrintFOpt : public LibCallOptimization {
     // in case there is an error writing to stdout.
     if (FormatStr.size() == 1) {
       Value *Res = EmitPutChar(ConstantInt::get(Type::getInt32Ty(*Context),
-                                                FormatStr[0]), B);
+                                                FormatStr[0]), B, TD);
       if (CI->use_empty()) return CI;
       return B.CreateIntCast(Res, CI->getType(), true);
     }
@@ -1526,7 +931,7 @@ struct PrintFOpt : public LibCallOptimization {
       Constant *C = ConstantArray::get(*Context, FormatStr, true);
       C = new GlobalVariable(*Callee->getParent(), C->getType(), true,
                              GlobalVariable::InternalLinkage, C, "str");
-      EmitPutS(C, B);
+      EmitPutS(C, B, TD);
       return CI->use_empty() ? (Value*)CI :
                     ConstantInt::get(CI->getType(), FormatStr.size()+1);
     }
@@ -1534,8 +939,8 @@ struct PrintFOpt : public LibCallOptimization {
     // Optimize specific format strings.
     // printf("%c", chr) --> putchar(*(i8*)dst)
     if (FormatStr == "%c" && CI->getNumOperands() > 2 &&
-        isa<IntegerType>(CI->getOperand(2)->getType())) {
-      Value *Res = EmitPutChar(CI->getOperand(2), B);
+        CI->getOperand(2)->getType()->isIntegerTy()) {
+      Value *Res = EmitPutChar(CI->getOperand(2), B, TD);
 
       if (CI->use_empty()) return CI;
       return B.CreateIntCast(Res, CI->getType(), true);
@@ -1543,9 +948,9 @@ struct PrintFOpt : public LibCallOptimization {
 
     // printf("%s\n", str) --> puts(str)
     if (FormatStr == "%s\n" && CI->getNumOperands() > 2 &&
-        isa<PointerType>(CI->getOperand(2)->getType()) &&
+        CI->getOperand(2)->getType()->isPointerTy() &&
         CI->use_empty()) {
-      EmitPutS(CI->getOperand(2), B);
+      EmitPutS(CI->getOperand(2), B, TD);
       return CI;
     }
     return 0;
@@ -1559,9 +964,9 @@ struct SPrintFOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     // Require two fixed pointer arguments and an integer result.
     const FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 2 || !isa<PointerType>(FT->getParamType(0)) ||
-        !isa<PointerType>(FT->getParamType(1)) ||
-        !isa<IntegerType>(FT->getReturnType()))
+    if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        !FT->getReturnType()->isIntegerTy())
       return 0;
 
     // Check for a fixed format string.
@@ -1582,8 +987,8 @@ struct SPrintFOpt : public LibCallOptimization {
 
       // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
       EmitMemCpy(CI->getOperand(1), CI->getOperand(2), // Copy the nul byte.
-          ConstantInt::get
-                 (TD->getIntPtrType(*Context), FormatStr.size()+1),1,B);
+                 ConstantInt::get(TD->getIntPtrType(*Context),
+                 FormatStr.size()+1), 1, B, TD);
       return ConstantInt::get(CI->getType(), FormatStr.size());
     }
 
@@ -1595,7 +1000,7 @@ struct SPrintFOpt : public LibCallOptimization {
     // Decode the second character of the format string.
     if (FormatStr[1] == 'c') {
       // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
-      if (!isa<IntegerType>(CI->getOperand(3)->getType())) return 0;
+      if (!CI->getOperand(3)->getType()->isIntegerTy()) return 0;
       Value *V = B.CreateTrunc(CI->getOperand(3),
 			       Type::getInt8Ty(*Context), "char");
       Value *Ptr = CastToCStr(CI->getOperand(1), B);
@@ -1612,13 +1017,13 @@ struct SPrintFOpt : public LibCallOptimization {
       if (!TD) return 0;
 
       // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
-      if (!isa<PointerType>(CI->getOperand(3)->getType())) return 0;
+      if (!CI->getOperand(3)->getType()->isPointerTy()) return 0;
 
-      Value *Len = EmitStrLen(CI->getOperand(3), B);
+      Value *Len = EmitStrLen(CI->getOperand(3), B, TD);
       Value *IncLen = B.CreateAdd(Len,
                                   ConstantInt::get(Len->getType(), 1),
                                   "leninc");
-      EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, B);
+      EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, B, TD);
 
       // The sprintf result is the unincremented number of bytes in the string.
       return B.CreateIntCast(Len, CI->getType(), false);
@@ -1634,11 +1039,11 @@ struct FWriteOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     // Require a pointer, an integer, an integer, a pointer, returning integer.
     const FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 4 || !isa<PointerType>(FT->getParamType(0)) ||
-        !isa<IntegerType>(FT->getParamType(1)) ||
-        !isa<IntegerType>(FT->getParamType(2)) ||
-        !isa<PointerType>(FT->getParamType(3)) ||
-        !isa<IntegerType>(FT->getReturnType()))
+    if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isIntegerTy() ||
+        !FT->getParamType(2)->isIntegerTy() ||
+        !FT->getParamType(3)->isPointerTy() ||
+        !FT->getReturnType()->isIntegerTy())
       return 0;
 
     // Get the element size and count.
@@ -1654,7 +1059,7 @@ struct FWriteOpt : public LibCallOptimization {
     // If this is writing one byte, turn it into fputc.
     if (Bytes == 1) {  // fwrite(S,1,1,F) -> fputc(S[0],F)
       Value *Char = B.CreateLoad(CastToCStr(CI->getOperand(1), B), "char");
-      EmitFPutC(Char, CI->getOperand(4), B);
+      EmitFPutC(Char, CI->getOperand(4), B, TD);
       return ConstantInt::get(CI->getType(), 1);
     }
 
@@ -1672,8 +1077,8 @@ struct FPutsOpt : public LibCallOptimization {
 
     // Require two pointers.  Also, we can't optimize if return value is used.
     const FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 2 || !isa<PointerType>(FT->getParamType(0)) ||
-        !isa<PointerType>(FT->getParamType(1)) ||
+    if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
         !CI->use_empty())
       return 0;
 
@@ -1682,7 +1087,7 @@ struct FPutsOpt : public LibCallOptimization {
     if (!Len) return 0;
     EmitFWrite(CI->getOperand(1),
                ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
-               CI->getOperand(2), B);
+               CI->getOperand(2), B, TD);
     return CI;  // Known to have no uses (see above).
   }
 };
@@ -1694,9 +1099,9 @@ struct FPrintFOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
     // Require two fixed paramters as pointers and integer result.
     const FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 2 || !isa<PointerType>(FT->getParamType(0)) ||
-        !isa<PointerType>(FT->getParamType(1)) ||
-        !isa<IntegerType>(FT->getReturnType()))
+    if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        !FT->getReturnType()->isIntegerTy())
       return 0;
 
     // All the optimizations depend on the format string.
@@ -1716,7 +1121,7 @@ struct FPrintFOpt : public LibCallOptimization {
       EmitFWrite(CI->getOperand(2),
                  ConstantInt::get(TD->getIntPtrType(*Context),
                                   FormatStr.size()),
-                 CI->getOperand(1), B);
+                 CI->getOperand(1), B, TD);
       return ConstantInt::get(CI->getType(), FormatStr.size());
     }
 
@@ -1728,16 +1133,16 @@ struct FPrintFOpt : public LibCallOptimization {
     // Decode the second character of the format string.
     if (FormatStr[1] == 'c') {
       // fprintf(F, "%c", chr) --> *(i8*)dst = chr
-      if (!isa<IntegerType>(CI->getOperand(3)->getType())) return 0;
-      EmitFPutC(CI->getOperand(3), CI->getOperand(1), B);
+      if (!CI->getOperand(3)->getType()->isIntegerTy()) return 0;
+      EmitFPutC(CI->getOperand(3), CI->getOperand(1), B, TD);
       return ConstantInt::get(CI->getType(), 1);
     }
 
     if (FormatStr[1] == 's') {
       // fprintf(F, "%s", str) -> fputs(str, F)
-      if (!isa<PointerType>(CI->getOperand(3)->getType()) || !CI->use_empty())
+      if (!CI->getOperand(3)->getType()->isPointerTy() || !CI->use_empty())
         return 0;
-      EmitFPutS(CI->getOperand(3), CI->getOperand(1), B);
+      EmitFPutS(CI->getOperand(3), CI->getOperand(1), B, TD);
       return CI;
     }
     return 0;
@@ -1769,10 +1174,6 @@ namespace {
     SPrintFOpt SPrintF; PrintFOpt PrintF;
     FWriteOpt FWrite; FPutsOpt FPuts; FPrintFOpt FPrintF;
 
-    // Object Size Checking
-    MemCpyChkOpt MemCpyChk; MemSetChkOpt MemSetChk; MemMoveChkOpt MemMoveChk;
-    StrCpyChkOpt StrCpyChk;
-
     bool Modified;  // This is only used by doInitialization.
   public:
     static char ID; // Pass identification
@@ -1878,12 +1279,6 @@ void SimplifyLibCalls::InitOptimizations() {
   Optimizations["fwrite"] = &FWrite;
   Optimizations["fputs"] = &FPuts;
   Optimizations["fprintf"] = &FPrintF;
-
-  // Object Size Checking
-  Optimizations["__memcpy_chk"] = &MemCpyChk;
-  Optimizations["__memset_chk"] = &MemSetChk;
-  Optimizations["__memmove_chk"] = &MemMoveChk;
-  Optimizations["__strcpy_chk"] = &StrCpyChk;
 }
 
 
@@ -2000,7 +1395,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
       case 's':
         if (Name == "strlen") {
           if (FTy->getNumParams() != 1 ||
-              !isa<PointerType>(FTy->getParamType(0)))
+              !FTy->getParamType(0)->isPointerTy())
             continue;
           setOnlyReadsMemory(F);
           setDoesNotThrow(F);
@@ -2018,14 +1413,14 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
                    Name == "strncpy" ||
                    Name == "strtoull") {
           if (FTy->getNumParams() < 2 ||
-              !isa<PointerType>(FTy->getParamType(1)))
+              !FTy->getParamType(1)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 2);
         } else if (Name == "strxfrm") {
           if (FTy->getNumParams() != 3 ||
-              !isa<PointerType>(FTy->getParamType(0)) ||
-              !isa<PointerType>(FTy->getParamType(1)))
+              !FTy->getParamType(0)->isPointerTy() ||
+              !FTy->getParamType(1)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
@@ -2038,8 +1433,8 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
                    Name == "strcasecmp" ||
                    Name == "strncasecmp") {
           if (FTy->getNumParams() < 2 ||
-              !isa<PointerType>(FTy->getParamType(0)) ||
-              !isa<PointerType>(FTy->getParamType(1)))
+              !FTy->getParamType(0)->isPointerTy() ||
+              !FTy->getParamType(1)->isPointerTy())
             continue;
           setOnlyReadsMemory(F);
           setDoesNotThrow(F);
@@ -2048,7 +1443,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         } else if (Name == "strstr" ||
                    Name == "strpbrk") {
           if (FTy->getNumParams() != 2 ||
-              !isa<PointerType>(FTy->getParamType(1)))
+              !FTy->getParamType(1)->isPointerTy())
             continue;
           setOnlyReadsMemory(F);
           setDoesNotThrow(F);
@@ -2056,7 +1451,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         } else if (Name == "strtok" ||
                    Name == "strtok_r") {
           if (FTy->getNumParams() < 2 ||
-              !isa<PointerType>(FTy->getParamType(1)))
+              !FTy->getParamType(1)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 2);
@@ -2064,15 +1459,15 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
                    Name == "setbuf" ||
                    Name == "setvbuf") {
           if (FTy->getNumParams() < 1 ||
-              !isa<PointerType>(FTy->getParamType(0)))
+              !FTy->getParamType(0)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
         } else if (Name == "strdup" ||
                    Name == "strndup") {
           if (FTy->getNumParams() < 1 ||
-              !isa<PointerType>(FTy->getReturnType()) ||
-              !isa<PointerType>(FTy->getParamType(0)))
+              !FTy->getReturnType()->isPointerTy() ||
+              !FTy->getParamType(0)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotAlias(F, 0);
@@ -2082,31 +1477,31 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
                    Name == "sprintf" ||
                    Name == "statvfs") {
           if (FTy->getNumParams() < 2 ||
-              !isa<PointerType>(FTy->getParamType(0)) ||
-              !isa<PointerType>(FTy->getParamType(1)))
+              !FTy->getParamType(0)->isPointerTy() ||
+              !FTy->getParamType(1)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 2);
         } else if (Name == "snprintf") {
           if (FTy->getNumParams() != 3 ||
-              !isa<PointerType>(FTy->getParamType(0)) ||
-              !isa<PointerType>(FTy->getParamType(2)))
+              !FTy->getParamType(0)->isPointerTy() ||
+              !FTy->getParamType(2)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 3);
         } else if (Name == "setitimer") {
           if (FTy->getNumParams() != 3 ||
-              !isa<PointerType>(FTy->getParamType(1)) ||
-              !isa<PointerType>(FTy->getParamType(2)))
+              !FTy->getParamType(1)->isPointerTy() ||
+              !FTy->getParamType(2)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 2);
           setDoesNotCapture(F, 3);
         } else if (Name == "system") {
           if (FTy->getNumParams() != 1 ||
-              !isa<PointerType>(FTy->getParamType(0)))
+              !FTy->getParamType(0)->isPointerTy())
             continue;
           // May throw; "system" is a valid pthread cancellation point.
           setDoesNotCapture(F, 1);
@@ -2115,14 +1510,14 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
       case 'm':
         if (Name == "malloc") {
           if (FTy->getNumParams() != 1 ||
-              !isa<PointerType>(FTy->getReturnType()))
+              !FTy->getReturnType()->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotAlias(F, 0);
         } else if (Name == "memcmp") {
           if (FTy->getNumParams() != 3 ||
-              !isa<PointerType>(FTy->getParamType(0)) ||
-              !isa<PointerType>(FTy->getParamType(1)))
+              !FTy->getParamType(0)->isPointerTy() ||
+              !FTy->getParamType(1)->isPointerTy())
             continue;
           setOnlyReadsMemory(F);
           setDoesNotThrow(F);
@@ -2141,18 +1536,18 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
                    Name == "memccpy" ||
                    Name == "memmove") {
           if (FTy->getNumParams() < 2 ||
-              !isa<PointerType>(FTy->getParamType(1)))
+              !FTy->getParamType(1)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 2);
         } else if (Name == "memalign") {
-          if (!isa<PointerType>(FTy->getReturnType()))
+          if (!FTy->getReturnType()->isPointerTy())
             continue;
           setDoesNotAlias(F, 0);
         } else if (Name == "mkdir" ||
                    Name == "mktime") {
           if (FTy->getNumParams() == 0 ||
-              !isa<PointerType>(FTy->getParamType(0)))
+              !FTy->getParamType(0)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
@@ -2161,15 +1556,15 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
       case 'r':
         if (Name == "realloc") {
           if (FTy->getNumParams() != 2 ||
-              !isa<PointerType>(FTy->getParamType(0)) ||
-              !isa<PointerType>(FTy->getReturnType()))
+              !FTy->getParamType(0)->isPointerTy() ||
+              !FTy->getReturnType()->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotAlias(F, 0);
           setDoesNotCapture(F, 1);
         } else if (Name == "read") {
           if (FTy->getNumParams() != 3 ||
-              !isa<PointerType>(FTy->getParamType(1)))
+              !FTy->getParamType(1)->isPointerTy())
             continue;
           // May throw; "read" is a valid pthread cancellation point.
           setDoesNotCapture(F, 2);
@@ -2178,15 +1573,15 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
                    Name == "remove" ||
                    Name == "realpath") {
           if (FTy->getNumParams() < 1 ||
-              !isa<PointerType>(FTy->getParamType(0)))
+              !FTy->getParamType(0)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
         } else if (Name == "rename" ||
                    Name == "readlink") {
           if (FTy->getNumParams() < 2 ||
-              !isa<PointerType>(FTy->getParamType(0)) ||
-              !isa<PointerType>(FTy->getParamType(1)))
+              !FTy->getParamType(0)->isPointerTy() ||
+              !FTy->getParamType(1)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
@@ -2196,7 +1591,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
       case 'w':
         if (Name == "write") {
           if (FTy->getNumParams() != 3 ||
-              !isa<PointerType>(FTy->getParamType(1)))
+              !FTy->getParamType(1)->isPointerTy())
             continue;
           // May throw; "write" is a valid pthread cancellation point.
           setDoesNotCapture(F, 2);
@@ -2205,16 +1600,16 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
       case 'b':
         if (Name == "bcopy") {
           if (FTy->getNumParams() != 3 ||
-              !isa<PointerType>(FTy->getParamType(0)) ||
-              !isa<PointerType>(FTy->getParamType(1)))
+              !FTy->getParamType(0)->isPointerTy() ||
+              !FTy->getParamType(1)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 2);
         } else if (Name == "bcmp") {
           if (FTy->getNumParams() != 3 ||
-              !isa<PointerType>(FTy->getParamType(0)) ||
-              !isa<PointerType>(FTy->getParamType(1)))
+              !FTy->getParamType(0)->isPointerTy() ||
+              !FTy->getParamType(1)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setOnlyReadsMemory(F);
@@ -2222,7 +1617,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotCapture(F, 2);
         } else if (Name == "bzero") {
           if (FTy->getNumParams() != 2 ||
-              !isa<PointerType>(FTy->getParamType(0)))
+              !FTy->getParamType(0)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
@@ -2231,7 +1626,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
       case 'c':
         if (Name == "calloc") {
           if (FTy->getNumParams() != 2 ||
-              !isa<PointerType>(FTy->getReturnType()))
+              !FTy->getReturnType()->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotAlias(F, 0);
@@ -2241,7 +1636,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
                    Name == "clearerr" ||
                    Name == "closedir") {
           if (FTy->getNumParams() == 0 ||
-              !isa<PointerType>(FTy->getParamType(0)))
+              !FTy->getParamType(0)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
@@ -2253,14 +1648,14 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
             Name == "atof" ||
             Name == "atoll") {
           if (FTy->getNumParams() != 1 ||
-              !isa<PointerType>(FTy->getParamType(0)))
+              !FTy->getParamType(0)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setOnlyReadsMemory(F);
           setDoesNotCapture(F, 1);
         } else if (Name == "access") {
           if (FTy->getNumParams() != 2 ||
-              !isa<PointerType>(FTy->getParamType(0)))
+              !FTy->getParamType(0)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
@@ -2269,9 +1664,9 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
       case 'f':
         if (Name == "fopen") {
           if (FTy->getNumParams() != 2 ||
-              !isa<PointerType>(FTy->getReturnType()) ||
-              !isa<PointerType>(FTy->getParamType(0)) ||
-              !isa<PointerType>(FTy->getParamType(1)))
+              !FTy->getReturnType()->isPointerTy() ||
+              !FTy->getParamType(0)->isPointerTy() ||
+              !FTy->getParamType(1)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotAlias(F, 0);
@@ -2279,8 +1674,8 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotCapture(F, 2);
         } else if (Name == "fdopen") {
           if (FTy->getNumParams() != 2 ||
-              !isa<PointerType>(FTy->getReturnType()) ||
-              !isa<PointerType>(FTy->getParamType(1)))
+              !FTy->getReturnType()->isPointerTy() ||
+              !FTy->getParamType(1)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotAlias(F, 0);
@@ -2300,13 +1695,13 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
                    Name == "funlockfile" ||
                    Name == "ftrylockfile") {
           if (FTy->getNumParams() == 0 ||
-              !isa<PointerType>(FTy->getParamType(0)))
+              !FTy->getParamType(0)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
         } else if (Name == "ferror") {
           if (FTy->getNumParams() != 1 ||
-              !isa<PointerType>(FTy->getParamType(0)))
+              !FTy->getParamType(0)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
@@ -2318,22 +1713,22 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
                    Name == "frexpl" ||
                    Name == "fstatvfs") {
           if (FTy->getNumParams() != 2 ||
-              !isa<PointerType>(FTy->getParamType(1)))
+              !FTy->getParamType(1)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 2);
         } else if (Name == "fgets") {
           if (FTy->getNumParams() != 3 ||
-              !isa<PointerType>(FTy->getParamType(0)) ||
-              !isa<PointerType>(FTy->getParamType(2)))
+              !FTy->getParamType(0)->isPointerTy() ||
+              !FTy->getParamType(2)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 3);
         } else if (Name == "fread" ||
                    Name == "fwrite") {
           if (FTy->getNumParams() != 4 ||
-              !isa<PointerType>(FTy->getParamType(0)) ||
-              !isa<PointerType>(FTy->getParamType(3)))
+              !FTy->getParamType(0)->isPointerTy() ||
+              !FTy->getParamType(3)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
@@ -2343,8 +1738,8 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
                    Name == "fprintf" ||
                    Name == "fgetpos") {
           if (FTy->getNumParams() < 2 ||
-              !isa<PointerType>(FTy->getParamType(0)) ||
-              !isa<PointerType>(FTy->getParamType(1)))
+              !FTy->getParamType(0)->isPointerTy() ||
+              !FTy->getParamType(1)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
@@ -2356,13 +1751,13 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
             Name == "getlogin_r" ||
             Name == "getc_unlocked") {
           if (FTy->getNumParams() == 0 ||
-              !isa<PointerType>(FTy->getParamType(0)))
+              !FTy->getParamType(0)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
         } else if (Name == "getenv") {
           if (FTy->getNumParams() != 1 ||
-              !isa<PointerType>(FTy->getParamType(0)))
+              !FTy->getParamType(0)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setOnlyReadsMemory(F);
@@ -2372,13 +1767,13 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotThrow(F);
         } else if (Name == "getitimer") {
           if (FTy->getNumParams() != 2 ||
-              !isa<PointerType>(FTy->getParamType(1)))
+              !FTy->getParamType(1)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 2);
         } else if (Name == "getpwnam") {
           if (FTy->getNumParams() != 1 ||
-              !isa<PointerType>(FTy->getParamType(0)))
+              !FTy->getParamType(0)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
@@ -2387,7 +1782,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
       case 'u':
         if (Name == "ungetc") {
           if (FTy->getNumParams() != 2 ||
-              !isa<PointerType>(FTy->getParamType(1)))
+              !FTy->getParamType(1)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 2);
@@ -2395,15 +1790,15 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
                    Name == "unlink" ||
                    Name == "unsetenv") {
           if (FTy->getNumParams() != 1 ||
-              !isa<PointerType>(FTy->getParamType(0)))
+              !FTy->getParamType(0)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
         } else if (Name == "utime" ||
                    Name == "utimes") {
           if (FTy->getNumParams() != 2 ||
-              !isa<PointerType>(FTy->getParamType(0)) ||
-              !isa<PointerType>(FTy->getParamType(1)))
+              !FTy->getParamType(0)->isPointerTy() ||
+              !FTy->getParamType(1)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
@@ -2413,7 +1808,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
       case 'p':
         if (Name == "putc") {
           if (FTy->getNumParams() != 2 ||
-              !isa<PointerType>(FTy->getParamType(1)))
+              !FTy->getParamType(1)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 2);
@@ -2421,14 +1816,14 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
                    Name == "printf" ||
                    Name == "perror") {
           if (FTy->getNumParams() != 1 ||
-              !isa<PointerType>(FTy->getParamType(0)))
+              !FTy->getParamType(0)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
         } else if (Name == "pread" ||
                    Name == "pwrite") {
           if (FTy->getNumParams() != 4 ||
-              !isa<PointerType>(FTy->getParamType(1)))
+              !FTy->getParamType(1)->isPointerTy())
             continue;
           // May throw; these are valid pthread cancellation points.
           setDoesNotCapture(F, 2);
@@ -2436,9 +1831,9 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotThrow(F);
         } else if (Name == "popen") {
           if (FTy->getNumParams() != 2 ||
-              !isa<PointerType>(FTy->getReturnType()) ||
-              !isa<PointerType>(FTy->getParamType(0)) ||
-              !isa<PointerType>(FTy->getParamType(1)))
+              !FTy->getReturnType()->isPointerTy() ||
+              !FTy->getParamType(0)->isPointerTy() ||
+              !FTy->getParamType(1)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotAlias(F, 0);
@@ -2446,7 +1841,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
           setDoesNotCapture(F, 2);
         } else if (Name == "pclose") {
           if (FTy->getNumParams() != 1 ||
-              !isa<PointerType>(FTy->getParamType(0)))
+              !FTy->getParamType(0)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
@@ -2455,43 +1850,43 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
       case 'v':
         if (Name == "vscanf") {
           if (FTy->getNumParams() != 2 ||
-              !isa<PointerType>(FTy->getParamType(1)))
+              !FTy->getParamType(1)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
         } else if (Name == "vsscanf" ||
                    Name == "vfscanf") {
           if (FTy->getNumParams() != 3 ||
-              !isa<PointerType>(FTy->getParamType(1)) ||
-              !isa<PointerType>(FTy->getParamType(2)))
+              !FTy->getParamType(1)->isPointerTy() ||
+              !FTy->getParamType(2)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 2);
         } else if (Name == "valloc") {
-          if (!isa<PointerType>(FTy->getReturnType()))
+          if (!FTy->getReturnType()->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotAlias(F, 0);
         } else if (Name == "vprintf") {
           if (FTy->getNumParams() != 2 ||
-              !isa<PointerType>(FTy->getParamType(0)))
+              !FTy->getParamType(0)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
         } else if (Name == "vfprintf" ||
                    Name == "vsprintf") {
           if (FTy->getNumParams() != 3 ||
-              !isa<PointerType>(FTy->getParamType(0)) ||
-              !isa<PointerType>(FTy->getParamType(1)))
+              !FTy->getParamType(0)->isPointerTy() ||
+              !FTy->getParamType(1)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 2);
         } else if (Name == "vsnprintf") {
           if (FTy->getNumParams() != 4 ||
-              !isa<PointerType>(FTy->getParamType(0)) ||
-              !isa<PointerType>(FTy->getParamType(2)))
+              !FTy->getParamType(0)->isPointerTy() ||
+              !FTy->getParamType(2)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
@@ -2501,14 +1896,14 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
       case 'o':
         if (Name == "open") {
           if (FTy->getNumParams() < 2 ||
-              !isa<PointerType>(FTy->getParamType(0)))
+              !FTy->getParamType(0)->isPointerTy())
             continue;
           // May throw; "open" is a valid pthread cancellation point.
           setDoesNotCapture(F, 1);
         } else if (Name == "opendir") {
           if (FTy->getNumParams() != 1 ||
-              !isa<PointerType>(FTy->getReturnType()) ||
-              !isa<PointerType>(FTy->getParamType(0)))
+              !FTy->getReturnType()->isPointerTy() ||
+              !FTy->getParamType(0)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotAlias(F, 0);
@@ -2517,13 +1912,13 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         break;
       case 't':
         if (Name == "tmpfile") {
-          if (!isa<PointerType>(FTy->getReturnType()))
+          if (!FTy->getReturnType()->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotAlias(F, 0);
         } else if (Name == "times") {
           if (FTy->getNumParams() != 1 ||
-              !isa<PointerType>(FTy->getParamType(0)))
+              !FTy->getParamType(0)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
@@ -2546,15 +1941,15 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
       case 'l':
         if (Name == "lstat") {
           if (FTy->getNumParams() != 2 ||
-              !isa<PointerType>(FTy->getParamType(0)) ||
-              !isa<PointerType>(FTy->getParamType(1)))
+              !FTy->getParamType(0)->isPointerTy() ||
+              !FTy->getParamType(1)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 2);
         } else if (Name == "lchown") {
           if (FTy->getNumParams() != 3 ||
-              !isa<PointerType>(FTy->getParamType(0)))
+              !FTy->getParamType(0)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
@@ -2563,7 +1958,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
       case 'q':
         if (Name == "qsort") {
           if (FTy->getNumParams() != 4 ||
-              !isa<PointerType>(FTy->getParamType(3)))
+              !FTy->getParamType(3)->isPointerTy())
             continue;
           // May throw; places call through function pointer.
           setDoesNotCapture(F, 4);
@@ -2573,27 +1968,27 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         if (Name == "__strdup" ||
             Name == "__strndup") {
           if (FTy->getNumParams() < 1 ||
-              !isa<PointerType>(FTy->getReturnType()) ||
-              !isa<PointerType>(FTy->getParamType(0)))
+              !FTy->getReturnType()->isPointerTy() ||
+              !FTy->getParamType(0)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotAlias(F, 0);
           setDoesNotCapture(F, 1);
         } else if (Name == "__strtok_r") {
           if (FTy->getNumParams() != 3 ||
-              !isa<PointerType>(FTy->getParamType(1)))
+              !FTy->getParamType(1)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 2);
         } else if (Name == "_IO_getc") {
           if (FTy->getNumParams() != 1 ||
-              !isa<PointerType>(FTy->getParamType(0)))
+              !FTy->getParamType(0)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
         } else if (Name == "_IO_putc") {
           if (FTy->getNumParams() != 2 ||
-              !isa<PointerType>(FTy->getParamType(1)))
+              !FTy->getParamType(1)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 2);
@@ -2602,7 +1997,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
       case 1:
         if (Name == "\1__isoc99_scanf") {
           if (FTy->getNumParams() < 1 ||
-              !isa<PointerType>(FTy->getParamType(0)))
+              !FTy->getParamType(0)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
@@ -2611,17 +2006,17 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
                    Name == "\1statvfs64" ||
                    Name == "\1__isoc99_sscanf") {
           if (FTy->getNumParams() < 1 ||
-              !isa<PointerType>(FTy->getParamType(0)) ||
-              !isa<PointerType>(FTy->getParamType(1)))
+              !FTy->getParamType(0)->isPointerTy() ||
+              !FTy->getParamType(1)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
           setDoesNotCapture(F, 2);
         } else if (Name == "\1fopen64") {
           if (FTy->getNumParams() != 2 ||
-              !isa<PointerType>(FTy->getReturnType()) ||
-              !isa<PointerType>(FTy->getParamType(0)) ||
-              !isa<PointerType>(FTy->getParamType(1)))
+              !FTy->getReturnType()->isPointerTy() ||
+              !FTy->getParamType(0)->isPointerTy() ||
+              !FTy->getParamType(1)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotAlias(F, 0);
@@ -2630,25 +2025,25 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
         } else if (Name == "\1fseeko64" ||
                    Name == "\1ftello64") {
           if (FTy->getNumParams() == 0 ||
-              !isa<PointerType>(FTy->getParamType(0)))
+              !FTy->getParamType(0)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 1);
         } else if (Name == "\1tmpfile64") {
-          if (!isa<PointerType>(FTy->getReturnType()))
+          if (!FTy->getReturnType()->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotAlias(F, 0);
         } else if (Name == "\1fstat64" ||
                    Name == "\1fstatvfs64") {
           if (FTy->getNumParams() != 2 ||
-              !isa<PointerType>(FTy->getParamType(1)))
+              !FTy->getParamType(1)->isPointerTy())
             continue;
           setDoesNotThrow(F);
           setDoesNotCapture(F, 2);
         } else if (Name == "\1open64") {
           if (FTy->getNumParams() < 2 ||
-              !isa<PointerType>(FTy->getParamType(0)))
+              !FTy->getParamType(0)->isPointerTy())
             continue;
           // May throw; "open" is a valid pthread cancellation point.
           setDoesNotCapture(F, 1);
diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp
index 8c4aa59..be6b383 100644
--- a/lib/Transforms/Utils/AddrModeMatcher.cpp
+++ b/lib/Transforms/Utils/AddrModeMatcher.cpp
@@ -125,7 +125,7 @@ static bool MightBeFoldableInst(Instruction *I) {
     // Don't touch identity bitcasts.
     if (I->getType() == I->getOperand(0)->getType())
       return false;
-    return isa<PointerType>(I->getType()) || isa<IntegerType>(I->getType());
+    return I->getType()->isPointerTy() || I->getType()->isIntegerTy();
   case Instruction::PtrToInt:
     // PtrToInt is always a noop, as we know that the int type is pointer sized.
     return true;
@@ -167,8 +167,8 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
   case Instruction::BitCast:
     // BitCast is always a noop, and we can handle it as long as it is
     // int->int or pointer->pointer (we don't want int<->fp or something).
-    if ((isa<PointerType>(AddrInst->getOperand(0)->getType()) ||
-         isa<IntegerType>(AddrInst->getOperand(0)->getType())) &&
+    if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
+         AddrInst->getOperand(0)->getType()->isIntegerTy()) &&
         // Don't touch identity bitcasts.  These were probably put here by LSR,
         // and we don't want to mess around with them.  Assume it knows what it
         // is doing.
@@ -569,7 +569,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
     // Get the access type of this use.  If the use isn't a pointer, we don't
     // know what it accesses.
     Value *Address = User->getOperand(OpNo);
-    if (!isa<PointerType>(Address->getType()))
+    if (!Address->getType()->isPointerTy())
       return false;
     const Type *AddressAccessTy =
       cast<PointerType>(Address->getType())->getElementType();
diff --git a/lib/Transforms/Utils/Android.mk b/lib/Transforms/Utils/Android.mk
new file mode 100644
index 0000000..d9f31d7
--- /dev/null
+++ b/lib/Transforms/Utils/Android.mk
@@ -0,0 +1,49 @@
+LOCAL_PATH:= $(call my-dir)
+
+transforms_utils_SRC_FILES :=	\
+	AddrModeMatcher.cpp	\
+	BasicBlockUtils.cpp	\
+	BasicInliner.cpp	\
+	BreakCriticalEdges.cpp	\
+	CloneFunction.cpp	\
+	CloneLoop.cpp	\
+	CloneModule.cpp	\
+	CodeExtractor.cpp	\
+	DemoteRegToStack.cpp	\
+	InlineFunction.cpp	\
+	InstructionNamer.cpp	\
+	LCSSA.cpp	\
+	Local.cpp	\
+	LoopSimplify.cpp	\
+	LoopUnroll.cpp	\
+	LowerInvoke.cpp	\
+	LowerSwitch.cpp	\
+	Mem2Reg.cpp	\
+	PromoteMemoryToRegister.cpp	\
+	SSAUpdater.cpp	\
+	SSI.cpp	\
+	SimplifyCFG.cpp	\
+	UnifyFunctionExitNodes.cpp	\
+	ValueMapper.cpp
+
+# For the host
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(transforms_utils_SRC_FILES)
+LOCAL_MODULE:= libLLVMTransformUtils
+
+include $(LLVM_HOST_BUILD_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+# For the device
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(transforms_utils_SRC_FILES)
+LOCAL_MODULE:= libLLVMTransformUtils
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 7bc4fcd..1f62dab 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -274,24 +274,31 @@ void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) {
     ReplaceInstWithInst(TI, NewTI);
 }
 
-/// SplitEdge -  Split the edge connecting specified block. Pass P must 
-/// not be NULL. 
-BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
-  TerminatorInst *LatchTerm = BB->getTerminator();
-  unsigned SuccNum = 0;
+/// GetSuccessorNumber - Search for the specified successor of basic block BB
+/// and return its position in the terminator instruction's list of
+/// successors.  It is an error to call this with a block that is not a
+/// successor.
+unsigned llvm::GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ) {
+  TerminatorInst *Term = BB->getTerminator();
 #ifndef NDEBUG
-  unsigned e = LatchTerm->getNumSuccessors();
+  unsigned e = Term->getNumSuccessors();
 #endif
   for (unsigned i = 0; ; ++i) {
     assert(i != e && "Didn't find edge?");
-    if (LatchTerm->getSuccessor(i) == Succ) {
-      SuccNum = i;
-      break;
-    }
+    if (Term->getSuccessor(i) == Succ)
+      return i;
   }
+  return 0;
+}
+
+/// SplitEdge -  Split the edge connecting specified block. Pass P must 
+/// not be NULL. 
+BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
+  unsigned SuccNum = GetSuccessorNumber(BB, Succ);
   
   // If this is a critical edge, let SplitCriticalEdge do it.
-  if (SplitCriticalEdge(BB->getTerminator(), SuccNum, P))
+  TerminatorInst *LatchTerm = BB->getTerminator();
+  if (SplitCriticalEdge(LatchTerm, SuccNum, P))
     return LatchTerm->getSuccessor(SuccNum);
 
   // If the edge isn't critical, then BB has a single successor or Succ has a
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 19c7206..3657390 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -179,7 +179,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
   // Create a new basic block, linking it into the CFG.
   BasicBlock *NewBB = BasicBlock::Create(TI->getContext(),
                       TIBB->getName() + "." + DestBB->getName() + "_crit_edge");
-  // Create our unconditional branch...
+  // Create our unconditional branch.
   BranchInst::Create(DestBB, NewBB);
 
   // Branch to the new block, breaking the edge.
@@ -192,16 +192,47 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
   
   // If there are any PHI nodes in DestBB, we need to update them so that they
   // merge incoming values from NewBB instead of from TIBB.
-  //
-  for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
-    PHINode *PN = cast<PHINode>(I);
-    // We no longer enter through TIBB, now we come in through NewBB.  Revector
-    // exactly one entry in the PHI node that used to come from TIBB to come
-    // from NewBB.
-    int BBIdx = PN->getBasicBlockIndex(TIBB);
-    PN->setIncomingBlock(BBIdx, NewBB);
+  if (PHINode *APHI = dyn_cast<PHINode>(DestBB->begin())) {
+    // This conceptually does:
+    //  foreach (PHINode *PN in DestBB)
+    //    PN->setIncomingBlock(PN->getIncomingBlock(TIBB), NewBB);
+    // but is optimized for two cases.
+    
+    if (APHI->getNumIncomingValues() <= 8) {  // Small # preds case.
+      unsigned BBIdx = 0;
+      for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
+        // We no longer enter through TIBB, now we come in through NewBB.
+        // Revector exactly one entry in the PHI node that used to come from
+        // TIBB to come from NewBB.
+        PHINode *PN = cast<PHINode>(I);
+        
+        // Reuse the previous value of BBIdx if it lines up.  In cases where we
+        // have multiple phi nodes with *lots* of predecessors, this is a speed
+        // win because we don't have to scan the PHI looking for TIBB.  This
+        // happens because the BB list of PHI nodes are usually in the same
+        // order.
+        if (PN->getIncomingBlock(BBIdx) != TIBB)
+          BBIdx = PN->getBasicBlockIndex(TIBB);
+        PN->setIncomingBlock(BBIdx, NewBB);
+      }
+    } else {
+      // However, the foreach loop is slow for blocks with lots of predecessors
+      // because PHINode::getIncomingBlock is O(n) in # preds.  Instead, walk
+      // the user list of TIBB to find the PHI nodes.
+      SmallPtrSet<PHINode*, 16> UpdatedPHIs;
+    
+      for (Value::use_iterator UI = TIBB->use_begin(), E = TIBB->use_end();
+           UI != E; ) {
+        Value::use_iterator Use = UI++;
+        if (PHINode *PN = dyn_cast<PHINode>(Use)) {
+          // Remove one entry from each PHI.
+          if (PN->getParent() == DestBB && UpdatedPHIs.insert(PN))
+            PN->setOperand(Use.getOperandNo(), NewBB);
+        }
+      }
+    }
   }
-  
+   
   // If there are any other edges from TIBB to DestBB, update those to go
   // through the split block, making those edges non-critical as well (and
   // reducing the number of phi entries in the DestBB if relevant).
@@ -221,6 +252,15 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
 
   // If we don't have a pass object, we can't update anything...
   if (P == 0) return NewBB;
+  
+  DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
+  DominanceFrontier *DF = P->getAnalysisIfAvailable<DominanceFrontier>();
+  LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
+  ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
+  
+  // If we have nothing to update, just return.
+  if (DT == 0 && DF == 0 && LI == 0 && PI == 0)
+    return NewBB;
 
   // Now update analysis information.  Since the only predecessor of NewBB is
   // the TIBB, TIBB clearly dominates NewBB.  TIBB usually doesn't dominate
@@ -229,14 +269,23 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
   // loop header) then NewBB dominates DestBB.
   SmallVector<BasicBlock*, 8> OtherPreds;
 
-  for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); I != E; ++I)
-    if (*I != NewBB)
-      OtherPreds.push_back(*I);
+  // If there is a PHI in the block, loop over predecessors with it, which is
+  // faster than iterating pred_begin/end.
+  if (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (PN->getIncomingBlock(i) != NewBB)
+        OtherPreds.push_back(PN->getIncomingBlock(i));
+  } else {
+    for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB);
+         I != E; ++I)
+      if (*I != NewBB)
+        OtherPreds.push_back(*I);
+  }
   
   bool NewBBDominatesDestBB = true;
   
   // Should we update DominatorTree information?
-  if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
+  if (DT) {
     DomTreeNode *TINode = DT->getNode(TIBB);
 
     // The new block is not the immediate dominator for any other nodes, but
@@ -267,7 +316,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
   }
 
   // Should we update DominanceFrontier information?
-  if (DominanceFrontier *DF = P->getAnalysisIfAvailable<DominanceFrontier>()) {
+  if (DF) {
     // If NewBBDominatesDestBB hasn't been computed yet, do so with DF.
     if (!OtherPreds.empty()) {
       // FIXME: IMPLEMENT THIS!
@@ -301,7 +350,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
   }
   
   // Update LoopInfo if it is around.
-  if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>()) {
+  if (LI) {
     if (Loop *TIL = LI->getLoopFor(TIBB)) {
       // If one or the other blocks were not in a loop, the new block is not
       // either, and thus LI doesn't need to be updated.
@@ -382,9 +431,8 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
   }
 
   // Update ProfileInfo if it is around.
-  if (ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>()) {
-    PI->splitEdge(TIBB,DestBB,NewBB,MergeIdenticalEdges);
-  }
+  if (PI)
+    PI->splitEdge(TIBB, DestBB, NewBB, MergeIdenticalEdges);
 
   return NewBB;
 }
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
new file mode 100644
index 0000000..2ea4bb6
--- /dev/null
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -0,0 +1,324 @@
+//===- BuildLibCalls.cpp - Utility builder for libcalls -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some functions that will create standard C libcalls.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Type.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Intrinsics.h"
+
+using namespace llvm;
+
+/// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*.
+Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) {
+  return B.CreateBitCast(V, B.getInt8PtrTy(), "cstr");
+}
+
+/// EmitStrLen - Emit a call to the strlen function to the builder, for the
+/// specified pointer.  This always returns an integer value of size intptr_t.
+Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[2];
+  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
+                                   Attribute::NoUnwind);
+
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  Constant *StrLen = M->getOrInsertFunction("strlen", AttrListPtr::get(AWI, 2),
+                                            TD->getIntPtrType(Context),
+                                            B.getInt8PtrTy(),
+                                            NULL);
+  CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");
+  if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+  return CI;
+}
+
+/// EmitStrChr - Emit a call to the strchr function to the builder, for the
+/// specified pointer and character.  Ptr is required to be some pointer type,
+/// and the return value has 'i8*' type.
+Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
+                        const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI =
+    AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
+
+  const Type *I8Ptr = B.getInt8PtrTy();
+  const Type *I32Ty = B.getInt32Ty();
+  Constant *StrChr = M->getOrInsertFunction("strchr", AttrListPtr::get(&AWI, 1),
+                                            I8Ptr, I8Ptr, I32Ty, NULL);
+  CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B),
+                               ConstantInt::get(I32Ty, C), "strchr");
+  if (const Function *F = dyn_cast<Function>(StrChr->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+  return CI;
+}
+
+/// EmitStrCpy - Emit a call to the strcpy function to the builder, for the
+/// specified pointer arguments.
+Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
+                        const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[2];
+  AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+  const Type *I8Ptr = B.getInt8PtrTy();
+  Value *StrCpy = M->getOrInsertFunction("strcpy", AttrListPtr::get(AWI, 2),
+                                         I8Ptr, I8Ptr, I8Ptr, NULL);
+  CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
+                               "strcpy");
+  if (const Function *F = dyn_cast<Function>(StrCpy->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+  return CI;
+}
+
+/// EmitMemCpy - Emit a call to the memcpy function to the builder.  This always
+/// expects that the size has type 'intptr_t' and Dst/Src are pointers.
+Value *llvm::EmitMemCpy(Value *Dst, Value *Src, Value *Len,
+                        unsigned Align, IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  const Type *Ty = Len->getType();
+  Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, &Ty, 1);
+  Dst = CastToCStr(Dst, B);
+  Src = CastToCStr(Src, B);
+  return B.CreateCall4(MemCpy, Dst, Src, Len,
+                       ConstantInt::get(B.getInt32Ty(), Align));
+}
+
+/// EmitMemMove - Emit a call to the memmove function to the builder.  This
+/// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
+Value *llvm::EmitMemMove(Value *Dst, Value *Src, Value *Len,
+					               unsigned Align, IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  const Type *Ty = TD->getIntPtrType(Context);
+  Value *MemMove = Intrinsic::getDeclaration(M, Intrinsic::memmove, &Ty, 1);
+  Dst = CastToCStr(Dst, B);
+  Src = CastToCStr(Src, B);
+  Value *A = ConstantInt::get(B.getInt32Ty(), Align);
+  return B.CreateCall4(MemMove, Dst, Src, Len, A);
+}
+
+/// EmitMemChr - Emit a call to the memchr function.  This assumes that Ptr is
+/// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
+Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
+                        Value *Len, IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI;
+  AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(&AWI, 1),
+                                         B.getInt8PtrTy(),
+                                         B.getInt8PtrTy(),
+                                         B.getInt32Ty(),
+                                         TD->getIntPtrType(Context),
+                                         NULL);
+  CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr");
+
+  if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+  return CI;
+}
+
+/// EmitMemCmp - Emit a call to the memcmp function.
+Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
+                        Value *Len, IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[3];
+  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
+  AWI[2] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
+                                   Attribute::NoUnwind);
+
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI, 3),
+                                         B.getInt32Ty(),
+                                         B.getInt8PtrTy(),
+                                         B.getInt8PtrTy(),
+                                         TD->getIntPtrType(Context), NULL);
+  CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B),
+                               Len, "memcmp");
+
+  if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+  return CI;
+}
+
+/// EmitMemSet - Emit a call to the memset function
+Value *llvm::EmitMemSet(Value *Dst, Value *Val,
+                        Value *Len, IRBuilder<> &B, const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ Intrinsic::ID IID = Intrinsic::memset;
+ const Type *Tys[1];
+ Tys[0] = Len->getType();
+ Value *MemSet = Intrinsic::getDeclaration(M, IID, Tys, 1);
+ Value *Align = ConstantInt::get(B.getInt32Ty(), 1);
+ return B.CreateCall4(MemSet, CastToCStr(Dst, B), Val, Len, Align);
+}
+
+/// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g.
+/// 'floor').  This function is known to take a single of type matching 'Op' and
+/// returns one value with the same type.  If 'Op' is a long double, 'l' is
+/// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix.
+Value *llvm::EmitUnaryFloatFnCall(Value *Op, const char *Name,
+                                  IRBuilder<> &B, const AttrListPtr &Attrs) {
+  char NameBuffer[20];
+  if (!Op->getType()->isDoubleTy()) {
+    // If we need to add a suffix, copy into NameBuffer.
+    unsigned NameLen = strlen(Name);
+    assert(NameLen < sizeof(NameBuffer)-2);
+    memcpy(NameBuffer, Name, NameLen);
+    if (Op->getType()->isFloatTy())
+      NameBuffer[NameLen] = 'f';  // floorf
+    else
+      NameBuffer[NameLen] = 'l';  // floorl
+    NameBuffer[NameLen+1] = 0;
+    Name = NameBuffer;
+  }
+
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
+                                         Op->getType(), NULL);
+  CallInst *CI = B.CreateCall(Callee, Op, Name);
+  CI->setAttributes(Attrs);
+  if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+  return CI;
+}
+
+/// EmitPutChar - Emit a call to the putchar function.  This assumes that Char
+/// is an integer.
+Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(),
+                                          B.getInt32Ty(), NULL);
+  CallInst *CI = B.CreateCall(PutChar,
+                              B.CreateIntCast(Char,
+                              B.getInt32Ty(),
+                              /*isSigned*/true,
+                              "chari"),
+                              "putchar");
+
+  if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+  return CI;
+}
+
+/// EmitPutS - Emit a call to the puts function.  This assumes that Str is
+/// some pointer.
+void llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[2];
+  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+
+  Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2),
+                                       B.getInt32Ty(),
+                                       B.getInt8PtrTy(),
+                                       NULL);
+  CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts");
+  if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+}
+
+/// EmitFPutC - Emit a call to the fputc function.  This assumes that Char is
+/// an integer and File is a pointer to FILE.
+void llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
+                     const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[2];
+  AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+  Constant *F;
+  if (File->getType()->isPointerTy())
+    F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2),
+                               B.getInt32Ty(),
+                               B.getInt32Ty(), File->getType(),
+                               NULL);
+  else
+    F = M->getOrInsertFunction("fputc",
+                               B.getInt32Ty(),
+                               B.getInt32Ty(),
+                               File->getType(), NULL);
+  Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/true,
+                         "chari");
+  CallInst *CI = B.CreateCall2(F, Char, File, "fputc");
+
+  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+    CI->setCallingConv(Fn->getCallingConv());
+}
+
+/// EmitFPutS - Emit a call to the puts function.  Str is required to be a
+/// pointer and File is a pointer to FILE.
+void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
+                     const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[3];
+  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
+  AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+  Constant *F;
+  if (File->getType()->isPointerTy())
+    F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3),
+                               B.getInt32Ty(),
+                               B.getInt8PtrTy(),
+                               File->getType(), NULL);
+  else
+    F = M->getOrInsertFunction("fputs", B.getInt32Ty(),
+                               B.getInt8PtrTy(),
+                               File->getType(), NULL);
+  CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs");
+
+  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+    CI->setCallingConv(Fn->getCallingConv());
+}
+
+/// EmitFWrite - Emit a call to the fwrite function.  This assumes that Ptr is
+/// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
+void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
+                      IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[3];
+  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(4, Attribute::NoCapture);
+  AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  Constant *F;
+  if (File->getType()->isPointerTy())
+    F = M->getOrInsertFunction("fwrite", AttrListPtr::get(AWI, 3),
+                               TD->getIntPtrType(Context),
+                               B.getInt8PtrTy(),
+                               TD->getIntPtrType(Context),
+                               TD->getIntPtrType(Context),
+                               File->getType(), NULL);
+  else
+    F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(Context),
+                               B.getInt8PtrTy(),
+                               TD->getIntPtrType(Context),
+                               TD->getIntPtrType(Context),
+                               File->getType(), NULL);
+  CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size,
+                        ConstantInt::get(TD->getIntPtrType(Context), 1), File);
+
+  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+    CI->setCallingConv(Fn->getCallingConv());
+}
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 93577b4..dec227a 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -3,6 +3,7 @@ add_llvm_library(LLVMTransformUtils
   BasicBlockUtils.cpp
   BasicInliner.cpp
   BreakCriticalEdges.cpp
+  BuildLibCalls.cpp
   CloneFunction.cpp
   CloneLoop.cpp
   CloneModule.cpp
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 7e7973a..d03f7a6 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -46,7 +46,7 @@ using namespace llvm;
 static Value *getUnderlyingObjectWithOffset(Value *V, const TargetData *TD,
                                             uint64_t &ByteOffset,
                                             unsigned MaxLookup = 6) {
-  if (!isa<PointerType>(V->getType()))
+  if (!V->getType()->isPointerTy())
     return V;
   for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
     if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
@@ -65,7 +65,7 @@ static Value *getUnderlyingObjectWithOffset(Value *V, const TargetData *TD,
     } else {
       return V;
     }
-    assert(isa<PointerType>(V->getType()) && "Unexpected operand type!");
+    assert(V->getType()->isPointerTy() && "Unexpected operand type!");
   }
   return V;
 }
@@ -490,6 +490,17 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
   // Splice all the instructions from PredBB to DestBB.
   PredBB->getTerminator()->eraseFromParent();
   DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList());
+
+  // Zap anything that took the address of DestBB.  Not doing this will give the
+  // address an invalid value.
+  if (DestBB->hasAddressTaken()) {
+    BlockAddress *BA = BlockAddress::get(DestBB);
+    Constant *Replacement =
+      ConstantInt::get(llvm::Type::getInt32Ty(BA->getContext()), 1);
+    BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(Replacement,
+                                                     BA->getType()));
+    BA->destroyConstant();
+  }
   
   // Anything that branched to PredBB now branches to DestBB.
   PredBB->replaceAllUsesWith(DestBB);
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 57bab60..924b744 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -51,6 +51,7 @@
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
@@ -147,6 +148,11 @@ ReprocessLoop:
     // Delete each unique out-of-loop (and thus dead) predecessor.
     for (SmallPtrSet<BasicBlock *, 4>::iterator I = BadPreds.begin(),
          E = BadPreds.end(); I != E; ++I) {
+
+      DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor ";
+            WriteAsOperand(dbgs(), *I, false);
+            dbgs() << "\n");
+
       // Inform each successor of each dead pred.
       for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI)
         (*SI)->removePredecessor(*I);
@@ -159,6 +165,27 @@ ReprocessLoop:
     }
   }
 
+  // If there are exiting blocks with branches on undef, resolve the undef in
+  // the direction which will exit the loop. This will help simplify loop
+  // trip count computations.
+  SmallVector<BasicBlock*, 8> ExitingBlocks;
+  L->getExitingBlocks(ExitingBlocks);
+  for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
+       E = ExitingBlocks.end(); I != E; ++I)
+    if (BranchInst *BI = dyn_cast<BranchInst>((*I)->getTerminator()))
+      if (BI->isConditional()) {
+        if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) {
+
+          DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in ";
+                WriteAsOperand(dbgs(), *I, false);
+                dbgs() << "\n");
+
+          BI->setCondition(ConstantInt::get(Cond->getType(),
+                                            !L->contains(BI->getSuccessor(0))));
+          Changed = true;
+        }
+      }
+
   // Does the loop already have a preheader?  If so, don't insert one.
   BasicBlock *Preheader = L->getLoopPreheader();
   if (!Preheader) {
@@ -250,8 +277,6 @@ ReprocessLoop:
         break;
       }
   if (UniqueExit) {
-    SmallVector<BasicBlock*, 8> ExitingBlocks;
-    L->getExitingBlocks(ExitingBlocks);
     for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
       BasicBlock *ExitingBlock = ExitingBlocks[i];
       if (!ExitingBlock->getSinglePredecessor()) continue;
@@ -282,6 +307,11 @@ ReprocessLoop:
 
       // Success. The block is now dead, so remove it from the loop,
       // update the dominator tree and dominance frontier, and delete it.
+
+      DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block ";
+            WriteAsOperand(dbgs(), ExitingBlock, false);
+            dbgs() << "\n");
+
       assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
       Changed = true;
       LI->removeBlock(ExitingBlock);
@@ -335,6 +365,10 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
     SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(),
                            ".preheader", this);
 
+  DEBUG(dbgs() << "LoopSimplify: Creating pre-header ";
+        WriteAsOperand(dbgs(), NewBB, false);
+        dbgs() << "\n");
+
   // Make sure that NewBB is put someplace intelligent, which doesn't mess up
   // code layout too horribly.
   PlaceSplitBlockCarefully(NewBB, OutsideBlocks, L);
@@ -360,6 +394,10 @@ BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
                                              LoopBlocks.size(), ".loopexit",
                                              this);
 
+  DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block ";
+        WriteAsOperand(dbgs(), NewBB, false);
+        dbgs() << "\n");
+
   return NewBB;
 }
 
@@ -480,6 +518,8 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
       OuterLoopPreds.push_back(PN->getIncomingBlock(i));
     }
 
+  DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n");
+
   BasicBlock *Header = L->getHeader();
   BasicBlock *NewBB = SplitBlockPredecessors(Header, &OuterLoopPreds[0],
                                              OuterLoopPreds.size(),
@@ -574,6 +614,10 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
                                            Header->getName()+".backedge", F);
   BranchInst *BETerminator = BranchInst::Create(Header, BEBlock);
 
+  DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block ";
+        WriteAsOperand(dbgs(), BEBlock, false);
+        dbgs() << "\n");
+
   // Move the new backedge block to right after the last backedge block.
   Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos;
   F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock);
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 544e20b..4f5a70b 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -518,7 +518,7 @@ void PromoteMem2Reg::run() {
       
       // If this PHI node merges one value and/or undefs, get the value.
       if (Value *V = PN->hasConstantValue(&DT)) {
-        if (AST && isa<PointerType>(PN->getType()))
+        if (AST && PN->getType()->isPointerTy())
           AST->deleteValue(PN);
         PN->replaceAllUsesWith(V);
         PN->eraseFromParent();
@@ -780,7 +780,7 @@ void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI,
     if (ReplVal == LI)
       ReplVal = UndefValue::get(LI->getType());
     LI->replaceAllUsesWith(ReplVal);
-    if (AST && isa<PointerType>(LI->getType()))
+    if (AST && LI->getType()->isPointerTy())
       AST->deleteValue(LI);
     LI->eraseFromParent();
     LBI.deleteValue(LI);
@@ -838,7 +838,7 @@ void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
     for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) 
       if (LoadInst *LI = dyn_cast<LoadInst>(*UI++)) {
         LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
-        if (AST && isa<PointerType>(LI->getType()))
+        if (AST && LI->getType()->isPointerTy())
           AST->deleteValue(LI);
         LBI.deleteValue(LI);
         LI->eraseFromParent();
@@ -874,7 +874,7 @@ void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
     // Otherwise, there was a store before this load, the load takes its value.
     --I;
     LI->replaceAllUsesWith(I->second->getOperand(0));
-    if (AST && isa<PointerType>(LI->getType()))
+    if (AST && LI->getType()->isPointerTy())
       AST->deleteValue(LI);
     LI->eraseFromParent();
     LBI.deleteValue(LI);
@@ -922,7 +922,7 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
   
   InsertedPHINodes.insert(PN);
 
-  if (AST && isa<PointerType>(PN->getType()))
+  if (AST && PN->getType()->isPointerTy())
     AST->copyValue(PointerAllocaValues[AllocaNo], PN);
 
   return true;
@@ -996,7 +996,7 @@ NextIteration:
 
       // Anything using the load now uses the current value.
       LI->replaceAllUsesWith(V);
-      if (AST && isa<PointerType>(LI->getType()))
+      if (AST && LI->getType()->isPointerTy())
         AST->deleteValue(LI);
       BB->getInstList().erase(LI);
     } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 795b6bf..f343c38 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -271,7 +271,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
 ConstantInt *SimplifyCFGOpt::GetConstantInt(Value *V) {
   // Normal constant int.
   ConstantInt *CI = dyn_cast<ConstantInt>(V);
-  if (CI || !TD || !isa<Constant>(V) || !isa<PointerType>(V->getType()))
+  if (CI || !TD || !isa<Constant>(V) || !V->getType()->isPointerTy())
     return CI;
 
   // This is some kind of pointer constant. Turn it into a pointer-sized
@@ -701,7 +701,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI) {
         AddPredecessorToBlock(NewSuccessors[i], Pred, BB);
 
       // Convert pointer to int before we switch.
-      if (isa<PointerType>(CV->getType())) {
+      if (CV->getType()->isPointerTy()) {
         assert(TD && "Cannot switch on pointer without TargetData");
         CV = new PtrToIntInst(CV, TD->getIntPtrType(CV->getContext()),
                               "magicptr", PTI);
@@ -915,7 +915,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
   case Instruction::Add:
   case Instruction::Sub:
     // Not worth doing for vector ops.
-    if (isa<VectorType>(HInst->getType()))
+    if (HInst->getType()->isVectorTy())
       return false;
     break;
   case Instruction::And:
@@ -925,7 +925,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
   case Instruction::LShr:
   case Instruction::AShr:
     // Don't mess with vector operations.
-    if (isa<VectorType>(HInst->getType()))
+    if (HInst->getType()->isVectorTy())
       return false;
     break;   // These are all cheap and non-trapping instructions.
   }
@@ -1077,7 +1077,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) {
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
     ConstantInt *CB;
     if ((CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i))) &&
-        CB->getType()->isInteger(1)) {
+        CB->getType()->isIntegerTy(1)) {
       // Okay, we now know that all edges from PredBB should be revectored to
       // branch to RealDest.
       BasicBlock *PredBB = PN->getIncomingBlock(i);
@@ -2068,7 +2068,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
           if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB);
 
           // Convert pointer to int before we switch.
-          if (isa<PointerType>(CompVal->getType())) {
+          if (CompVal->getType()->isPointerTy()) {
             assert(TD && "Cannot switch on pointer without TargetData");
             CompVal = new PtrToIntInst(CompVal,
                                        TD->getIntPtrType(CompVal->getContext()),
diff --git a/lib/VMCore/Android.mk b/lib/VMCore/Android.mk
new file mode 100644
index 0000000..4784684
--- /dev/null
+++ b/lib/VMCore/Android.mk
@@ -0,0 +1,61 @@
+LOCAL_PATH:= $(call my-dir)
+
+vmcore_SRC_FILES :=	\
+	AsmWriter.cpp	\
+	Attributes.cpp	\
+	AutoUpgrade.cpp	\
+	BasicBlock.cpp	\
+	ConstantFold.cpp	\
+	Constants.cpp	\
+	Core.cpp	\
+	Dominators.cpp	\
+	Function.cpp	\
+	GVMaterializer.cpp	\
+	Globals.cpp	\
+	IRBuilder.cpp	\
+	InlineAsm.cpp	\
+	Instruction.cpp	\
+	Instructions.cpp	\
+	IntrinsicInst.cpp	\
+	LLVMContext.cpp	\
+	LeakDetector.cpp	\
+	Metadata.cpp	\
+	Module.cpp	\
+	Pass.cpp	\
+	PassManager.cpp	\
+	PrintModulePass.cpp	\
+	Type.cpp	\
+	TypeSymbolTable.cpp	\
+	Use.cpp	\
+	Value.cpp	\
+	ValueSymbolTable.cpp	\
+	ValueTypes.cpp	\
+	Verifier.cpp
+
+# For the host
+# =====================================================
+include $(CLEAR_VARS)
+
+REQUIRES_RTTI := 1
+
+LOCAL_SRC_FILES := $(vmcore_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMCore
+
+include $(LLVM_HOST_BUILD_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+# For the device
+# =====================================================
+include $(CLEAR_VARS)
+
+REQUIRES_RTTI := 1
+
+LOCAL_SRC_FILES := $(vmcore_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMCore
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_STATIC_LIBRARY)
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index ab5f45a..fd74241 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -17,6 +17,7 @@
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/Assembly/AsmAnnotationWriter.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/CallingConv.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
@@ -239,6 +240,19 @@ void TypePrinting::CalcTypeName(const Type *Ty,
       OS << '>';
     break;
   }
+  case Type::UnionTyID: {
+    const UnionType *UTy = cast<UnionType>(Ty);
+    OS << "union { ";
+    for (StructType::element_iterator I = UTy->element_begin(),
+         E = UTy->element_end(); I != E; ++I) {
+      CalcTypeName(*I, TypeStack, OS);
+      if (next(I) != UTy->element_end())
+        OS << ',';
+      OS << ' ';
+    }
+    OS << '}';
+    break;
+  }
   case Type::PointerTyID: {
     const PointerType *PTy = cast<PointerType>(Ty);
     CalcTypeName(PTy->getElementType(), TypeStack, OS);
@@ -363,8 +377,8 @@ namespace {
         return;
 
       // If this is a structure or opaque type, add a name for the type.
-      if (((isa<StructType>(Ty) && cast<StructType>(Ty)->getNumElements())
-            || isa<OpaqueType>(Ty)) && !TP.hasTypeName(Ty)) {
+      if (((Ty->isStructTy() && cast<StructType>(Ty)->getNumElements())
+            || Ty->isOpaqueTy()) && !TP.hasTypeName(Ty)) {
         TP.addTypeName(Ty, "%"+utostr(unsigned(NumberedTypes.size())));
         NumberedTypes.push_back(Ty);
       }
@@ -418,13 +432,13 @@ static void AddModuleTypesToPrinter(TypePrinting &TP,
     // they are used too often to have a single useful name.
     if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) {
       const Type *PETy = PTy->getElementType();
-      if ((PETy->isPrimitiveType() || PETy->isInteger()) &&
-          !isa<OpaqueType>(PETy))
+      if ((PETy->isPrimitiveType() || PETy->isIntegerTy()) &&
+          !PETy->isOpaqueTy())
         continue;
     }
 
     // Likewise don't insert primitives either.
-    if (Ty->isInteger() || Ty->isPrimitiveType())
+    if (Ty->isIntegerTy() || Ty->isPrimitiveType())
       continue;
 
     // Get the name as a string and insert it into TypeNames.
@@ -836,7 +850,7 @@ static void WriteOptimizationInfo(raw_ostream &Out, const User *U) {
 static void WriteConstantInt(raw_ostream &Out, const Constant *CV,
                              TypePrinting &TypePrinter, SlotTracker *Machine) {
   if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
-    if (CI->getType()->isInteger(1)) {
+    if (CI->getType()->isIntegerTy(1)) {
       Out << (CI->getZExtValue() ? "true" : "false");
       return;
     }
@@ -1223,7 +1237,6 @@ class AssemblyWriter {
   TypePrinting TypePrinter;
   AssemblyAnnotationWriter *AnnotationWriter;
   std::vector<const Type*> NumberedTypes;
-  SmallVector<StringRef, 8> MDNames;
   
 public:
   inline AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
@@ -1231,8 +1244,6 @@ public:
                         AssemblyAnnotationWriter *AAW)
     : Out(o), Machine(Mac), TheModule(M), AnnotationWriter(AAW) {
     AddModuleTypesToPrinter(TypePrinter, NumberedTypes, M);
-    if (M)
-      M->getMDKindNames(MDNames);
   }
 
   void printMDNodeBody(const MDNode *MD);
@@ -1252,15 +1263,14 @@ public:
   void printArgument(const Argument *FA, Attributes Attrs);
   void printBasicBlock(const BasicBlock *BB);
   void printInstruction(const Instruction &I);
-private:
 
+private:
   // printInfoComment - Print a little comment after the instruction indicating
   // which slot it occupies.
   void printInfoComment(const Value &V);
 };
 }  // end of anonymous namespace
 
-
 void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) {
   if (Operand == 0) {
     Out << "<null operand!>";
@@ -1689,11 +1699,15 @@ void AssemblyWriter::printBasicBlock(const BasicBlock *BB) {
   if (AnnotationWriter) AnnotationWriter->emitBasicBlockEndAnnot(BB, Out);
 }
 
-
 /// printInfoComment - Print a little comment after the instruction indicating
 /// which slot it occupies.
 ///
 void AssemblyWriter::printInfoComment(const Value &V) {
+  if (AnnotationWriter) {
+    AnnotationWriter->printInfoComment(V, Out);
+    return;
+  }
+
   if (V.getType()->isVoidTy()) return;
   
   Out.PadToColumn(50);
@@ -1834,8 +1848,8 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     //
     Out << ' ';
     if (!FTy->isVarArg() &&
-        (!isa<PointerType>(RetTy) ||
-         !isa<FunctionType>(cast<PointerType>(RetTy)->getElementType()))) {
+        (!RetTy->isPointerTy() ||
+         !cast<PointerType>(RetTy)->getElementType()->isFunctionTy())) {
       TypePrinter.print(RetTy, Out);
       Out << ' ';
       writeOperand(Operand, false);
@@ -1880,8 +1894,8 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     //
     Out << ' ';
     if (!FTy->isVarArg() &&
-        (!isa<PointerType>(RetTy) ||
-         !isa<FunctionType>(cast<PointerType>(RetTy)->getElementType()))) {
+        (!RetTy->isPointerTy() ||
+         !cast<PointerType>(RetTy)->getElementType()->isFunctionTy())) {
       TypePrinter.print(RetTy, Out);
       Out << ' ';
       writeOperand(Operand, false);
@@ -1972,12 +1986,20 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
   }
 
   // Print Metadata info.
-  if (!MDNames.empty()) {
-    SmallVector<std::pair<unsigned, MDNode*>, 4> InstMD;
-    I.getAllMetadata(InstMD);
-    for (unsigned i = 0, e = InstMD.size(); i != e; ++i)
-      Out << ", !" << MDNames[InstMD[i].first]
-          << " !" << Machine.getMetadataSlot(InstMD[i].second);
+  SmallVector<std::pair<unsigned, MDNode*>, 4> InstMD;
+  I.getAllMetadata(InstMD);
+  if (!InstMD.empty()) {
+    SmallVector<StringRef, 8> MDNames;
+    I.getType()->getContext().getMDKindNames(MDNames);
+    for (unsigned i = 0, e = InstMD.size(); i != e; ++i) {
+      unsigned Kind = InstMD[i].first;
+       if (Kind < MDNames.size()) {
+         Out << ", !" << MDNames[Kind];
+      } else {
+        Out << ", !<unknown kind #" << Kind << ">";
+      }
+      Out << " !" << Machine.getMetadataSlot(InstMD[i].second);
+    }
   }
   printInfoComment(I);
 }
diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp
index a371c6f..a000aee 100644
--- a/lib/VMCore/Attributes.cpp
+++ b/lib/VMCore/Attributes.cpp
@@ -70,6 +70,11 @@ std::string Attribute::getAsString(Attributes Attrs) {
     Result += "noimplicitfloat ";
   if (Attrs & Attribute::Naked)
     Result += "naked ";
+  if (Attrs & Attribute::StackAlignment) {
+    Result += "alignstack(";
+    Result += utostr(Attribute::getStackAlignmentFromAttrs(Attrs));
+    Result += ") ";
+  }
   if (Attrs & Attribute::Alignment) {
     Result += "align ";
     Result += utostr(Attribute::getAlignmentFromAttrs(Attrs));
@@ -84,11 +89,11 @@ std::string Attribute::getAsString(Attributes Attrs) {
 Attributes Attribute::typeIncompatible(const Type *Ty) {
   Attributes Incompatible = None;
   
-  if (!Ty->isInteger())
+  if (!Ty->isIntegerTy())
     // Attributes that only apply to integers.
     Incompatible |= SExt | ZExt;
   
-  if (!isa<PointerType>(Ty))
+  if (!Ty->isPointerTy())
     // Attributes that only apply to pointers.
     Incompatible |= ByVal | Nest | NoAlias | StructRet | NoCapture;
   
diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp
index 5c117d8..549977c 100644
--- a/lib/VMCore/ConstantFold.cpp
+++ b/lib/VMCore/ConstantFold.cpp
@@ -112,7 +112,7 @@ static Constant *FoldBitCast(Constant *V, const Type *DestTy) {
             IdxList.push_back(Zero);
           } else if (const SequentialType *STy = 
                      dyn_cast<SequentialType>(ElTy)) {
-            if (isa<PointerType>(ElTy)) break;  // Can't index into pointers!
+            if (ElTy->isPointerTy()) break;  // Can't index into pointers!
             ElTy = STy->getElementType();
             IdxList.push_back(Zero);
           } else {
@@ -155,12 +155,12 @@ static Constant *FoldBitCast(Constant *V, const Type *DestTy) {
 
   // Handle integral constant input.
   if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
-    if (DestTy->isInteger())
+    if (DestTy->isIntegerTy())
       // Integral -> Integral. This is a no-op because the bit widths must
       // be the same. Consequently, we just fold to V.
       return V;
 
-    if (DestTy->isFloatingPoint())
+    if (DestTy->isFloatingPointTy())
       return ConstantFP::get(DestTy->getContext(),
                              APFloat(CI->getValue(),
                                      !DestTy->isPPC_FP128Ty()));
@@ -189,7 +189,7 @@ static Constant *FoldBitCast(Constant *V, const Type *DestTy) {
 /// 
 static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
                                       unsigned ByteSize) {
-  assert(isa<IntegerType>(C->getType()) &&
+  assert(C->getType()->isIntegerTy() &&
          (cast<IntegerType>(C->getType())->getBitWidth() & 7) == 0 &&
          "Non-byte sized integer input");
   unsigned CSize = cast<IntegerType>(C->getType())->getBitWidth()/8;
@@ -334,11 +334,7 @@ static Constant *getFoldedSizeOf(const Type *Ty, const Type *DestTy,
     Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true);
     return ConstantExpr::getNUWMul(E, N);
   }
-  if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
-    Constant *N = ConstantInt::get(DestTy, VTy->getNumElements());
-    Constant *E = getFoldedSizeOf(VTy->getElementType(), DestTy, true);
-    return ConstantExpr::getNUWMul(E, N);
-  }
+
   if (const StructType *STy = dyn_cast<StructType>(Ty))
     if (!STy->isPacked()) {
       unsigned NumElems = STy->getNumElements();
@@ -361,10 +357,26 @@ static Constant *getFoldedSizeOf(const Type *Ty, const Type *DestTy,
       }
     }
 
+  if (const UnionType *UTy = dyn_cast<UnionType>(Ty)) {
+    unsigned NumElems = UTy->getNumElements();
+    // Check for a union with all members having the same size.
+    Constant *MemberSize =
+      getFoldedSizeOf(UTy->getElementType(0), DestTy, true);
+    bool AllSame = true;
+    for (unsigned i = 1; i != NumElems; ++i)
+      if (MemberSize !=
+          getFoldedSizeOf(UTy->getElementType(i), DestTy, true)) {
+        AllSame = false;
+        break;
+      }
+    if (AllSame)
+      return MemberSize;
+  }
+
   // Pointer size doesn't depend on the pointee type, so canonicalize them
   // to an arbitrary pointee.
   if (const PointerType *PTy = dyn_cast<PointerType>(Ty))
-    if (!PTy->getElementType()->isInteger(1))
+    if (!PTy->getElementType()->isIntegerTy(1))
       return
         getFoldedSizeOf(PointerType::get(IntegerType::get(PTy->getContext(), 1),
                                          PTy->getAddressSpace()),
@@ -426,10 +438,28 @@ static Constant *getFoldedAlignOf(const Type *Ty, const Type *DestTy,
       return MemberAlign;
   }
 
+  if (const UnionType *UTy = dyn_cast<UnionType>(Ty)) {
+    // Union alignment is the maximum alignment of any member.
+    // Without target data, we can't compare much, but we can check to see
+    // if all the members have the same alignment.
+    unsigned NumElems = UTy->getNumElements();
+    // Check for a union with all members having the same alignment.
+    Constant *MemberAlign =
+      getFoldedAlignOf(UTy->getElementType(0), DestTy, true);
+    bool AllSame = true;
+    for (unsigned i = 1; i != NumElems; ++i)
+      if (MemberAlign != getFoldedAlignOf(UTy->getElementType(i), DestTy, true)) {
+        AllSame = false;
+        break;
+      }
+    if (AllSame)
+      return MemberAlign;
+  }
+
   // Pointer alignment doesn't depend on the pointee type, so canonicalize them
   // to an arbitrary pointee.
   if (const PointerType *PTy = dyn_cast<PointerType>(Ty))
-    if (!PTy->getElementType()->isInteger(1))
+    if (!PTy->getElementType()->isIntegerTy(1))
       return
         getFoldedAlignOf(PointerType::get(IntegerType::get(PTy->getContext(),
                                                            1),
@@ -464,13 +494,7 @@ static Constant *getFoldedOffsetOf(const Type *Ty, Constant *FieldNo,
     Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true);
     return ConstantExpr::getNUWMul(E, N);
   }
-  if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
-    Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo, false,
-                                                                DestTy, false),
-                                        FieldNo, DestTy);
-    Constant *E = getFoldedSizeOf(VTy->getElementType(), DestTy, true);
-    return ConstantExpr::getNUWMul(E, N);
-  }
+
   if (const StructType *STy = dyn_cast<StructType>(Ty))
     if (!STy->isPacked()) {
       unsigned NumElems = STy->getNumElements();
@@ -551,7 +575,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
   // operating on each element. In the cast of bitcasts, the element
   // count may be mismatched; don't attempt to handle that here.
   if (ConstantVector *CV = dyn_cast<ConstantVector>(V))
-    if (isa<VectorType>(DestTy) &&
+    if (DestTy->isVectorTy() &&
         cast<VectorType>(DestTy)->getNumElements() ==
         CV->getType()->getNumElements()) {
       std::vector<Constant*> res;
@@ -629,12 +653,12 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
               ConstantInt *CI = cast<ConstantInt>(CE->getOperand(2));
               if (CI->isOne() &&
                   STy->getNumElements() == 2 &&
-                  STy->getElementType(0)->isInteger(1)) {
+                  STy->getElementType(0)->isIntegerTy(1)) {
                 return getFoldedAlignOf(STy->getElementType(1), DestTy, false);
               }
             }
           // Handle an offsetof-like expression.
-          if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)){
+          if (Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy()){
             if (Constant *C = getFoldedOffsetOf(Ty, CE->getOperand(2),
                                                 DestTy, false))
               return C;
@@ -885,6 +909,8 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
     unsigned numOps;
     if (const ArrayType *AR = dyn_cast<ArrayType>(AggTy))
       numOps = AR->getNumElements();
+    else if (AggTy->isUnionTy())
+      numOps = 1;
     else
       numOps = cast<StructType>(AggTy)->getNumElements();
     
@@ -901,6 +927,10 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
     
     if (const StructType* ST = dyn_cast<StructType>(AggTy))
       return ConstantStruct::get(ST->getContext(), Ops, ST->isPacked());
+    if (const UnionType* UT = dyn_cast<UnionType>(AggTy)) {
+      assert(Ops.size() == 1 && "Union can only contain a single value!");
+      return ConstantUnion::get(UT, Ops[0]);
+    }
     return ConstantArray::get(cast<ArrayType>(AggTy), Ops);
   }
   
@@ -1099,6 +1129,10 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
           return ConstantExpr::getLShr(C1, C2);
       break;
     }
+  } else if (isa<ConstantInt>(C1)) {
+    // If C1 is a ConstantInt and C2 is not, swap the operands.
+    if (Instruction::isCommutative(Opcode))
+      return ConstantExpr::get(Opcode, C2, C1);
   }
 
   // At this point we know neither constant is an UndefValue.
@@ -1358,35 +1392,12 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
   } else if (isa<ConstantExpr>(C2)) {
     // If C2 is a constant expr and C1 isn't, flop them around and fold the
     // other way if possible.
-    switch (Opcode) {
-    case Instruction::Add:
-    case Instruction::FAdd:
-    case Instruction::Mul:
-    case Instruction::FMul:
-    case Instruction::And:
-    case Instruction::Or:
-    case Instruction::Xor:
-      // No change of opcode required.
+    if (Instruction::isCommutative(Opcode))
       return ConstantFoldBinaryInstruction(Opcode, C2, C1);
-
-    case Instruction::Shl:
-    case Instruction::LShr:
-    case Instruction::AShr:
-    case Instruction::Sub:
-    case Instruction::FSub:
-    case Instruction::SDiv:
-    case Instruction::UDiv:
-    case Instruction::FDiv:
-    case Instruction::URem:
-    case Instruction::SRem:
-    case Instruction::FRem:
-    default:  // These instructions cannot be flopped around.
-      break;
-    }
   }
 
   // i1 can be simplified in many cases.
-  if (C1->getType()->isInteger(1)) {
+  if (C1->getType()->isIntegerTy(1)) {
     switch (Opcode) {
     case Instruction::Add:
     case Instruction::Sub:
@@ -1421,7 +1432,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
 /// isZeroSizedType - This type is zero sized if its an array or structure of
 /// zero sized types.  The only leaf zero sized type is an empty structure.
 static bool isMaybeZeroSizedType(const Type *Ty) {
-  if (isa<OpaqueType>(Ty)) return true;  // Can't say.
+  if (Ty->isOpaqueTy()) return true;  // Can't say.
   if (const StructType *STy = dyn_cast<StructType>(Ty)) {
 
     // If all of elements have zero size, this does too.
@@ -1452,10 +1463,10 @@ static int IdxCompare(Constant *C1, Constant *C2,  const Type *ElTy) {
 
   // Ok, we have two differing integer indices.  Sign extend them to be the same
   // type.  Long is always big enough, so we use it.
-  if (!C1->getType()->isInteger(64))
+  if (!C1->getType()->isIntegerTy(64))
     C1 = ConstantExpr::getSExt(C1, Type::getInt64Ty(C1->getContext()));
 
-  if (!C2->getType()->isInteger(64))
+  if (!C2->getType()->isIntegerTy(64))
     C2 = ConstantExpr::getSExt(C2, Type::getInt64Ty(C1->getContext()));
 
   if (C1 == C2) return 0;  // They are equal
@@ -1661,7 +1672,7 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2,
       // If the cast is not actually changing bits, and the second operand is a
       // null pointer, do the comparison with the pre-casted value.
       if (V2->isNullValue() &&
-          (isa<PointerType>(CE1->getType()) || CE1->getType()->isInteger())) {
+          (CE1->getType()->isPointerTy() || CE1->getType()->isIntegerTy())) {
         if (CE1->getOpcode() == Instruction::ZExt) isSigned = false;
         if (CE1->getOpcode() == Instruction::SExt) isSigned = true;
         return evaluateICmpRelation(CE1Op0,
@@ -1807,7 +1818,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
 
   // Handle some degenerate cases first
   if (isa<UndefValue>(C1) || isa<UndefValue>(C2))
-    return UndefValue::get(ResultTy);
+    return ConstantInt::get(ResultTy, CmpInst::isTrueWhenEqual(pred));
 
   // No compile-time operations on this type yet.
   if (C1->getType()->isPPC_FP128Ty())
@@ -1836,7 +1847,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
   }
 
   // If the comparison is a comparison between two i1's, simplify it.
-  if (C1->getType()->isInteger(1)) {
+  if (C1->getType()->isIntegerTy(1)) {
     switch(pred) {
     case ICmpInst::ICMP_EQ:
       if (isa<ConstantInt>(C2))
@@ -1908,7 +1919,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
       return ConstantInt::get(ResultTy, R==APFloat::cmpGreaterThan ||
                                         R==APFloat::cmpEqual);
     }
-  } else if (isa<VectorType>(C1->getType())) {
+  } else if (C1->getType()->isVectorTy()) {
     SmallVector<Constant*, 16> C1Elts, C2Elts;
     C1->getVectorElements(C1Elts);
     C2->getVectorElements(C2Elts);
@@ -1925,7 +1936,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
     return ConstantVector::get(&ResElts[0], ResElts.size());
   }
 
-  if (C1->getType()->isFloatingPoint()) {
+  if (C1->getType()->isFloatingPointTy()) {
     int Result = -1;  // -1 = unknown, 0 = known false, 1 = known true.
     switch (evaluateFCmpRelation(C1, C2)) {
     default: llvm_unreachable("Unknown relation!");
@@ -2059,7 +2070,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
     if (ConstantExpr *CE2 = dyn_cast<ConstantExpr>(C2)) {
       Constant *CE2Op0 = CE2->getOperand(0);
       if (CE2->getOpcode() == Instruction::BitCast &&
-          isa<VectorType>(CE2->getType())==isa<VectorType>(CE2Op0->getType())) {
+          CE2->getType()->isVectorTy() == CE2Op0->getType()->isVectorTy()) {
         Constant *Inverse = ConstantExpr::getBitCast(C1, CE2Op0->getType());
         return ConstantExpr::getICmp(pred, Inverse, CE2Op0);
       }
@@ -2067,8 +2078,8 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
 
     // If the left hand side is an extension, try eliminating it.
     if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
-      if (CE1->getOpcode() == Instruction::SExt ||
-          CE1->getOpcode() == Instruction::ZExt) {
+      if ((CE1->getOpcode() == Instruction::SExt && ICmpInst::isSigned(pred)) ||
+          (CE1->getOpcode() == Instruction::ZExt && !ICmpInst::isSigned(pred))){
         Constant *CE1Op0 = CE1->getOperand(0);
         Constant *CE1Inverse = ConstantExpr::getTrunc(CE1, CE1Op0->getType());
         if (CE1Inverse == CE1Op0) {
@@ -2086,27 +2097,8 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
       // If C2 is a constant expr and C1 isn't, flip them around and fold the
       // other way if possible.
       // Also, if C1 is null and C2 isn't, flip them around.
-      switch (pred) {
-      case ICmpInst::ICMP_EQ:
-      case ICmpInst::ICMP_NE:
-        // No change of predicate required.
-        return ConstantExpr::getICmp(pred, C2, C1);
-
-      case ICmpInst::ICMP_ULT:
-      case ICmpInst::ICMP_SLT:
-      case ICmpInst::ICMP_UGT:
-      case ICmpInst::ICMP_SGT:
-      case ICmpInst::ICMP_ULE:
-      case ICmpInst::ICMP_SLE:
-      case ICmpInst::ICMP_UGE:
-      case ICmpInst::ICMP_SGE:
-        // Change the predicate as necessary to swap the operands.
-        pred = ICmpInst::getSwappedPredicate((ICmpInst::Predicate)pred);
-        return ConstantExpr::getICmp(pred, C2, C1);
-
-      default:  // These predicates cannot be flopped around.
-        break;
-      }
+      pred = ICmpInst::getSwappedPredicate((ICmpInst::Predicate)pred);
+      return ConstantExpr::getICmp(pred, C2, C1);
     }
   }
   return 0;
@@ -2178,7 +2170,7 @@ Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
            I != E; ++I)
         LastTy = *I;
 
-      if ((LastTy && isa<ArrayType>(LastTy)) || Idx0->isNullValue()) {
+      if ((LastTy && LastTy->isArrayTy()) || Idx0->isNullValue()) {
         SmallVector<Value*, 16> NewIndices;
         NewIndices.reserve(NumIdx + CE->getNumOperands());
         for (unsigned i = 1, e = CE->getNumOperands()-1; i != e; ++i)
@@ -2260,10 +2252,10 @@ Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
 
             // Before adding, extend both operands to i64 to avoid
             // overflow trouble.
-            if (!PrevIdx->getType()->isInteger(64))
+            if (!PrevIdx->getType()->isIntegerTy(64))
               PrevIdx = ConstantExpr::getSExt(PrevIdx,
                                            Type::getInt64Ty(Div->getContext()));
-            if (!Div->getType()->isInteger(64))
+            if (!Div->getType()->isIntegerTy(64))
               Div = ConstantExpr::getSExt(Div,
                                           Type::getInt64Ty(Div->getContext()));
 
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
index 2250626..10f8879 100644
--- a/lib/VMCore/Constants.cpp
+++ b/lib/VMCore/Constants.cpp
@@ -229,7 +229,7 @@ Constant::PossibleRelocationsTy Constant::getRelocationInfo() const {
 /// This handles breaking down a vector undef into undef elements, etc.  For
 /// constant exprs and other cases we can't handle, we return an empty vector.
 void Constant::getVectorElements(SmallVectorImpl<Constant*> &Elts) const {
-  assert(isa<VectorType>(getType()) && "Not a vector constant!");
+  assert(getType()->isVectorTy() && "Not a vector constant!");
   
   if (const ConstantVector *CV = dyn_cast<ConstantVector>(this)) {
     for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i)
@@ -404,13 +404,13 @@ ConstantFP* ConstantFP::getNegativeZero(const Type* Ty) {
 
 Constant* ConstantFP::getZeroValueForNegation(const Type* Ty) {
   if (const VectorType *PTy = dyn_cast<VectorType>(Ty))
-    if (PTy->getElementType()->isFloatingPoint()) {
+    if (PTy->getElementType()->isFloatingPointTy()) {
       std::vector<Constant*> zeros(PTy->getNumElements(),
                            getNegativeZero(PTy->getElementType()));
       return ConstantVector::get(PTy, zeros);
     }
 
-  if (Ty->isFloatingPoint()) 
+  if (Ty->isFloatingPointTy()) 
     return getNegativeZero(Ty);
 
   return Constant::getNullValue(Ty);
@@ -585,6 +585,27 @@ Constant* ConstantStruct::get(LLVMContext &Context,
   return get(Context, std::vector<Constant*>(Vals, Vals+NumVals), Packed);
 }
 
+ConstantUnion::ConstantUnion(const UnionType *T, Constant* V)
+  : Constant(T, ConstantUnionVal,
+             OperandTraits<ConstantUnion>::op_end(this) - 1, 1) {
+  Use *OL = OperandList;
+  assert(T->getElementTypeIndex(V->getType()) >= 0 &&
+      "Initializer for union element isn't a member of union type!");
+  *OL = V;
+}
+
+// ConstantUnion accessors.
+Constant* ConstantUnion::get(const UnionType* T, Constant* V) {
+  LLVMContextImpl* pImpl = T->getContext().pImpl;
+  
+  // Create a ConstantAggregateZero value if all elements are zeros...
+  if (!V->isNullValue())
+    return pImpl->UnionConstants.getOrCreate(T, V);
+
+  return ConstantAggregateZero::get(T);
+}
+
+
 ConstantVector::ConstantVector(const VectorType *T,
                                const std::vector<Constant*> &V)
   : Constant(T, ConstantVectorVal,
@@ -640,13 +661,13 @@ Constant* ConstantVector::get(Constant* const* Vals, unsigned NumVals) {
 }
 
 Constant* ConstantExpr::getNSWNeg(Constant* C) {
-  assert(C->getType()->isIntOrIntVector() &&
+  assert(C->getType()->isIntOrIntVectorTy() &&
          "Cannot NEG a nonintegral value!");
   return getNSWSub(ConstantFP::getZeroValueForNegation(C->getType()), C);
 }
 
 Constant* ConstantExpr::getNUWNeg(Constant* C) {
-  assert(C->getType()->isIntOrIntVector() &&
+  assert(C->getType()->isIntOrIntVectorTy() &&
          "Cannot NEG a nonintegral value!");
   return getNUWSub(ConstantFP::getZeroValueForNegation(C->getType()), C);
 }
@@ -923,7 +944,7 @@ bool ConstantFP::isValueValidForType(const Type *Ty, const APFloat& Val) {
 //                      Factory Function Implementation
 
 ConstantAggregateZero* ConstantAggregateZero::get(const Type* Ty) {
-  assert((isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) &&
+  assert((Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy()) &&
          "Cannot create an aggregate zero of non-aggregate type!");
   
   LLVMContextImpl *pImpl = Ty->getContext().pImpl;
@@ -948,7 +969,7 @@ void ConstantArray::destroyConstant() {
 /// if the elements of the array are all ConstantInt's.
 bool ConstantArray::isString() const {
   // Check the element type for i8...
-  if (!getType()->getElementType()->isInteger(8))
+  if (!getType()->getElementType()->isIntegerTy(8))
     return false;
   // Check the elements to make sure they are all integers, not constant
   // expressions.
@@ -963,7 +984,7 @@ bool ConstantArray::isString() const {
 /// null bytes except its terminator.
 bool ConstantArray::isCString() const {
   // Check the element type for i8...
-  if (!getType()->getElementType()->isInteger(8))
+  if (!getType()->getElementType()->isIntegerTy(8))
     return false;
 
   // Last element must be a null.
@@ -1010,6 +1031,13 @@ void ConstantStruct::destroyConstant() {
 
 // destroyConstant - Remove the constant from the constant table...
 //
+void ConstantUnion::destroyConstant() {
+  getType()->getContext().pImpl->UnionConstants.remove(this);
+  destroyConstantImpl();
+}
+
+// destroyConstant - Remove the constant from the constant table...
+//
 void ConstantVector::destroyConstant() {
   getType()->getContext().pImpl->VectorConstants.remove(this);
   destroyConstantImpl();
@@ -1211,18 +1239,18 @@ Constant *ConstantExpr::getTruncOrBitCast(Constant *C, const Type *Ty) {
 }
 
 Constant *ConstantExpr::getPointerCast(Constant *S, const Type *Ty) {
-  assert(isa<PointerType>(S->getType()) && "Invalid cast");
-  assert((Ty->isInteger() || isa<PointerType>(Ty)) && "Invalid cast");
+  assert(S->getType()->isPointerTy() && "Invalid cast");
+  assert((Ty->isIntegerTy() || Ty->isPointerTy()) && "Invalid cast");
 
-  if (Ty->isInteger())
+  if (Ty->isIntegerTy())
     return getCast(Instruction::PtrToInt, S, Ty);
   return getCast(Instruction::BitCast, S, Ty);
 }
 
 Constant *ConstantExpr::getIntegerCast(Constant *C, const Type *Ty, 
                                        bool isSigned) {
-  assert(C->getType()->isIntOrIntVector() &&
-         Ty->isIntOrIntVector() && "Invalid cast");
+  assert(C->getType()->isIntOrIntVectorTy() &&
+         Ty->isIntOrIntVectorTy() && "Invalid cast");
   unsigned SrcBits = C->getType()->getScalarSizeInBits();
   unsigned DstBits = Ty->getScalarSizeInBits();
   Instruction::CastOps opcode =
@@ -1233,7 +1261,7 @@ Constant *ConstantExpr::getIntegerCast(Constant *C, const Type *Ty,
 }
 
 Constant *ConstantExpr::getFPCast(Constant *C, const Type *Ty) {
-  assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() &&
+  assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
          "Invalid cast");
   unsigned SrcBits = C->getType()->getScalarSizeInBits();
   unsigned DstBits = Ty->getScalarSizeInBits();
@@ -1250,8 +1278,8 @@ Constant *ConstantExpr::getTrunc(Constant *C, const Type *Ty) {
   bool toVec = Ty->getTypeID() == Type::VectorTyID;
 #endif
   assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
-  assert(C->getType()->isIntOrIntVector() && "Trunc operand must be integer");
-  assert(Ty->isIntOrIntVector() && "Trunc produces only integral");
+  assert(C->getType()->isIntOrIntVectorTy() && "Trunc operand must be integer");
+  assert(Ty->isIntOrIntVectorTy() && "Trunc produces only integral");
   assert(C->getType()->getScalarSizeInBits() > Ty->getScalarSizeInBits()&&
          "SrcTy must be larger than DestTy for Trunc!");
 
@@ -1264,8 +1292,8 @@ Constant *ConstantExpr::getSExt(Constant *C, const Type *Ty) {
   bool toVec = Ty->getTypeID() == Type::VectorTyID;
 #endif
   assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
-  assert(C->getType()->isIntOrIntVector() && "SExt operand must be integral");
-  assert(Ty->isIntOrIntVector() && "SExt produces only integer");
+  assert(C->getType()->isIntOrIntVectorTy() && "SExt operand must be integral");
+  assert(Ty->isIntOrIntVectorTy() && "SExt produces only integer");
   assert(C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
          "SrcTy must be smaller than DestTy for SExt!");
 
@@ -1278,8 +1306,8 @@ Constant *ConstantExpr::getZExt(Constant *C, const Type *Ty) {
   bool toVec = Ty->getTypeID() == Type::VectorTyID;
 #endif
   assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
-  assert(C->getType()->isIntOrIntVector() && "ZEXt operand must be integral");
-  assert(Ty->isIntOrIntVector() && "ZExt produces only integer");
+  assert(C->getType()->isIntOrIntVectorTy() && "ZEXt operand must be integral");
+  assert(Ty->isIntOrIntVectorTy() && "ZExt produces only integer");
   assert(C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
          "SrcTy must be smaller than DestTy for ZExt!");
 
@@ -1292,7 +1320,7 @@ Constant *ConstantExpr::getFPTrunc(Constant *C, const Type *Ty) {
   bool toVec = Ty->getTypeID() == Type::VectorTyID;
 #endif
   assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
-  assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() &&
+  assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
          C->getType()->getScalarSizeInBits() > Ty->getScalarSizeInBits()&&
          "This is an illegal floating point truncation!");
   return getFoldedCast(Instruction::FPTrunc, C, Ty);
@@ -1304,7 +1332,7 @@ Constant *ConstantExpr::getFPExtend(Constant *C, const Type *Ty) {
   bool toVec = Ty->getTypeID() == Type::VectorTyID;
 #endif
   assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
-  assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() &&
+  assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
          C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
          "This is an illegal floating point extension!");
   return getFoldedCast(Instruction::FPExt, C, Ty);
@@ -1316,7 +1344,7 @@ Constant *ConstantExpr::getUIToFP(Constant *C, const Type *Ty) {
   bool toVec = Ty->getTypeID() == Type::VectorTyID;
 #endif
   assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
-  assert(C->getType()->isIntOrIntVector() && Ty->isFPOrFPVector() &&
+  assert(C->getType()->isIntOrIntVectorTy() && Ty->isFPOrFPVectorTy() &&
          "This is an illegal uint to floating point cast!");
   return getFoldedCast(Instruction::UIToFP, C, Ty);
 }
@@ -1327,7 +1355,7 @@ Constant *ConstantExpr::getSIToFP(Constant *C, const Type *Ty) {
   bool toVec = Ty->getTypeID() == Type::VectorTyID;
 #endif
   assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
-  assert(C->getType()->isIntOrIntVector() && Ty->isFPOrFPVector() &&
+  assert(C->getType()->isIntOrIntVectorTy() && Ty->isFPOrFPVectorTy() &&
          "This is an illegal sint to floating point cast!");
   return getFoldedCast(Instruction::SIToFP, C, Ty);
 }
@@ -1338,7 +1366,7 @@ Constant *ConstantExpr::getFPToUI(Constant *C, const Type *Ty) {
   bool toVec = Ty->getTypeID() == Type::VectorTyID;
 #endif
   assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
-  assert(C->getType()->isFPOrFPVector() && Ty->isIntOrIntVector() &&
+  assert(C->getType()->isFPOrFPVectorTy() && Ty->isIntOrIntVectorTy() &&
          "This is an illegal floating point to uint cast!");
   return getFoldedCast(Instruction::FPToUI, C, Ty);
 }
@@ -1349,20 +1377,20 @@ Constant *ConstantExpr::getFPToSI(Constant *C, const Type *Ty) {
   bool toVec = Ty->getTypeID() == Type::VectorTyID;
 #endif
   assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
-  assert(C->getType()->isFPOrFPVector() && Ty->isIntOrIntVector() &&
+  assert(C->getType()->isFPOrFPVectorTy() && Ty->isIntOrIntVectorTy() &&
          "This is an illegal floating point to sint cast!");
   return getFoldedCast(Instruction::FPToSI, C, Ty);
 }
 
 Constant *ConstantExpr::getPtrToInt(Constant *C, const Type *DstTy) {
-  assert(isa<PointerType>(C->getType()) && "PtrToInt source must be pointer");
-  assert(DstTy->isInteger() && "PtrToInt destination must be integral");
+  assert(C->getType()->isPointerTy() && "PtrToInt source must be pointer");
+  assert(DstTy->isIntegerTy() && "PtrToInt destination must be integral");
   return getFoldedCast(Instruction::PtrToInt, C, DstTy);
 }
 
 Constant *ConstantExpr::getIntToPtr(Constant *C, const Type *DstTy) {
-  assert(C->getType()->isInteger() && "IntToPtr source must be integral");
-  assert(isa<PointerType>(DstTy) && "IntToPtr destination must be a pointer");
+  assert(C->getType()->isIntegerTy() && "IntToPtr source must be integral");
+  assert(DstTy->isPointerTy() && "IntToPtr destination must be a pointer");
   return getFoldedCast(Instruction::IntToPtr, C, DstTy);
 }
 
@@ -1421,7 +1449,7 @@ Constant *ConstantExpr::getCompareTy(unsigned short predicate,
 Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
                             unsigned Flags) {
   // API compatibility: Adjust integer opcodes to floating-point opcodes.
-  if (C1->getType()->isFPOrFPVector()) {
+  if (C1->getType()->isFPOrFPVectorTy()) {
     if (Opcode == Instruction::Add) Opcode = Instruction::FAdd;
     else if (Opcode == Instruction::Sub) Opcode = Instruction::FSub;
     else if (Opcode == Instruction::Mul) Opcode = Instruction::FMul;
@@ -1432,51 +1460,51 @@ Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
   case Instruction::Sub:
   case Instruction::Mul:
     assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert(C1->getType()->isIntOrIntVector() &&
+    assert(C1->getType()->isIntOrIntVectorTy() &&
            "Tried to create an integer operation on a non-integer type!");
     break;
   case Instruction::FAdd:
   case Instruction::FSub:
   case Instruction::FMul:
     assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert(C1->getType()->isFPOrFPVector() &&
+    assert(C1->getType()->isFPOrFPVectorTy() &&
            "Tried to create a floating-point operation on a "
            "non-floating-point type!");
     break;
   case Instruction::UDiv: 
   case Instruction::SDiv: 
     assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert(C1->getType()->isIntOrIntVector() &&
+    assert(C1->getType()->isIntOrIntVectorTy() &&
            "Tried to create an arithmetic operation on a non-arithmetic type!");
     break;
   case Instruction::FDiv:
     assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert(C1->getType()->isFPOrFPVector() &&
+    assert(C1->getType()->isFPOrFPVectorTy() &&
            "Tried to create an arithmetic operation on a non-arithmetic type!");
     break;
   case Instruction::URem: 
   case Instruction::SRem: 
     assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert(C1->getType()->isIntOrIntVector() &&
+    assert(C1->getType()->isIntOrIntVectorTy() &&
            "Tried to create an arithmetic operation on a non-arithmetic type!");
     break;
   case Instruction::FRem:
     assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert(C1->getType()->isFPOrFPVector() &&
+    assert(C1->getType()->isFPOrFPVectorTy() &&
            "Tried to create an arithmetic operation on a non-arithmetic type!");
     break;
   case Instruction::And:
   case Instruction::Or:
   case Instruction::Xor:
     assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert(C1->getType()->isIntOrIntVector() &&
+    assert(C1->getType()->isIntOrIntVectorTy() &&
            "Tried to create a logical operation on a non-integral type!");
     break;
   case Instruction::Shl:
   case Instruction::LShr:
   case Instruction::AShr:
     assert(C1->getType() == C2->getType() && "Op types should be identical!");
-    assert(C1->getType()->isIntOrIntVector() &&
+    assert(C1->getType()->isIntOrIntVectorTy() &&
            "Tried to create a shift operation on a non-integer type!");
     break;
   default:
@@ -1564,7 +1592,7 @@ Constant *ConstantExpr::getGetElementPtrTy(const Type *ReqTy, Constant *C,
                                                (Constant**)Idxs, NumIdx))
     return FC;          // Fold a few common cases...
 
-  assert(isa<PointerType>(C->getType()) &&
+  assert(C->getType()->isPointerTy() &&
          "Non-pointer type for constant GetElementPtr expression");
   // Look up the constant in the table first to ensure uniqueness
   std::vector<Constant*> ArgVec;
@@ -1591,7 +1619,7 @@ Constant *ConstantExpr::getInBoundsGetElementPtrTy(const Type *ReqTy,
                                                (Constant**)Idxs, NumIdx))
     return FC;          // Fold a few common cases...
 
-  assert(isa<PointerType>(C->getType()) &&
+  assert(C->getType()->isPointerTy() &&
          "Non-pointer type for constant GetElementPtr expression");
   // Look up the constant in the table first to ensure uniqueness
   std::vector<Constant*> ArgVec;
@@ -1699,9 +1727,9 @@ Constant *ConstantExpr::getExtractElementTy(const Type *ReqTy, Constant *Val,
 }
 
 Constant *ConstantExpr::getExtractElement(Constant *Val, Constant *Idx) {
-  assert(isa<VectorType>(Val->getType()) &&
+  assert(Val->getType()->isVectorTy() &&
          "Tried to create extractelement operation on non-vector type!");
-  assert(Idx->getType()->isInteger(32) &&
+  assert(Idx->getType()->isIntegerTy(32) &&
          "Extractelement index must be i32 type!");
   return getExtractElementTy(cast<VectorType>(Val->getType())->getElementType(),
                              Val, Idx);
@@ -1723,11 +1751,11 @@ Constant *ConstantExpr::getInsertElementTy(const Type *ReqTy, Constant *Val,
 
 Constant *ConstantExpr::getInsertElement(Constant *Val, Constant *Elt, 
                                          Constant *Idx) {
-  assert(isa<VectorType>(Val->getType()) &&
+  assert(Val->getType()->isVectorTy() &&
          "Tried to create insertelement operation on non-vector type!");
   assert(Elt->getType() == cast<VectorType>(Val->getType())->getElementType()
          && "Insertelement types must match!");
-  assert(Idx->getType()->isInteger(32) &&
+  assert(Idx->getType()->isIntegerTy(32) &&
          "Insertelement index must be i32 type!");
   return getInsertElementTy(Val->getType(), Val, Elt, Idx);
 }
@@ -1811,9 +1839,9 @@ Constant *ConstantExpr::getExtractValue(Constant *Agg,
 
 Constant* ConstantExpr::getNeg(Constant* C) {
   // API compatibility: Adjust integer opcodes to floating-point opcodes.
-  if (C->getType()->isFPOrFPVector())
+  if (C->getType()->isFPOrFPVectorTy())
     return getFNeg(C);
-  assert(C->getType()->isIntOrIntVector() &&
+  assert(C->getType()->isIntOrIntVectorTy() &&
          "Cannot NEG a nonintegral value!");
   return get(Instruction::Sub,
              ConstantFP::getZeroValueForNegation(C->getType()),
@@ -1821,7 +1849,7 @@ Constant* ConstantExpr::getNeg(Constant* C) {
 }
 
 Constant* ConstantExpr::getFNeg(Constant* C) {
-  assert(C->getType()->isFPOrFPVector() &&
+  assert(C->getType()->isFPOrFPVectorTy() &&
          "Cannot FNEG a non-floating-point value!");
   return get(Instruction::FSub,
              ConstantFP::getZeroValueForNegation(C->getType()),
@@ -1829,7 +1857,7 @@ Constant* ConstantExpr::getFNeg(Constant* C) {
 }
 
 Constant* ConstantExpr::getNot(Constant* C) {
-  assert(C->getType()->isIntOrIntVector() &&
+  assert(C->getType()->isIntOrIntVectorTy() &&
          "Cannot NOT a nonintegral value!");
   return get(Instruction::Xor, C, Constant::getAllOnesValue(C->getType()));
 }
@@ -2083,6 +2111,56 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
   destroyConstant();
 }
 
+void ConstantUnion::replaceUsesOfWithOnConstant(Value *From, Value *To,
+                                                 Use *U) {
+  assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
+  Constant *ToC = cast<Constant>(To);
+
+  assert(U == OperandList && "Union constants can only have one use!");
+  assert(getNumOperands() == 1 && "Union constants can only have one use!");
+  assert(getOperand(0) == From && "ReplaceAllUsesWith broken!");
+
+  std::pair<LLVMContextImpl::UnionConstantsTy::MapKey, ConstantUnion*> Lookup;
+  Lookup.first.first = getType();
+  Lookup.second = this;
+  Lookup.first.second = ToC;
+
+  LLVMContext &Context = getType()->getContext();
+  LLVMContextImpl *pImpl = Context.pImpl;
+
+  Constant *Replacement = 0;
+  if (ToC->isNullValue()) {
+    Replacement = ConstantAggregateZero::get(getType());
+  } else {
+    // Check to see if we have this union type already.
+    bool Exists;
+    LLVMContextImpl::UnionConstantsTy::MapTy::iterator I =
+      pImpl->UnionConstants.InsertOrGetItem(Lookup, Exists);
+    
+    if (Exists) {
+      Replacement = I->second;
+    } else {
+      // Okay, the new shape doesn't exist in the system yet.  Instead of
+      // creating a new constant union, inserting it, replaceallusesof'ing the
+      // old with the new, then deleting the old... just update the current one
+      // in place!
+      pImpl->UnionConstants.MoveConstantToNewSlot(this, I);
+      
+      // Update to the new value.
+      setOperand(0, ToC);
+      return;
+    }
+  }
+  
+  assert(Replacement != this && "I didn't contain From!");
+  
+  // Everyone using this now uses the replacement.
+  uncheckedReplaceAllUsesWith(Replacement);
+  
+  // Delete the old constant!
+  destroyConstant();
+}
+
 void ConstantVector::replaceUsesOfWithOnConstant(Value *From, Value *To,
                                                  Use *U) {
   assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
diff --git a/lib/VMCore/ConstantsContext.h b/lib/VMCore/ConstantsContext.h
index 08224e4..c798ba2 100644
--- a/lib/VMCore/ConstantsContext.h
+++ b/lib/VMCore/ConstantsContext.h
@@ -341,6 +341,13 @@ struct ConstantTraits< std::vector<T, Alloc> > {
   }
 };
 
+template<>
+struct ConstantTraits<Constant *> {
+  static unsigned uses(Constant * const & v) {
+    return 1;
+  }
+};
+
 template<class ConstantClass, class TypeClass, class ValType>
 struct ConstantCreator {
   static ConstantClass *create(const TypeClass *Ty, const ValType &V) {
@@ -470,6 +477,14 @@ struct ConstantKeyData<ConstantStruct> {
   }
 };
 
+template<>
+struct ConstantKeyData<ConstantUnion> {
+  typedef Constant* ValType;
+  static ValType getValType(ConstantUnion *CU) {
+    return cast<Constant>(CU->getOperand(0));
+  }
+};
+
 // ConstantPointerNull does not take extra "value" argument...
 template<class ValType>
 struct ConstantCreator<ConstantPointerNull, PointerType, ValType> {
diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp
index 1755cd2..f4f65c5 100644
--- a/lib/VMCore/Core.cpp
+++ b/lib/VMCore/Core.cpp
@@ -55,6 +55,15 @@ void LLVMContextDispose(LLVMContextRef C) {
   delete unwrap(C);
 }
 
+unsigned LLVMGetMDKindIDInContext(LLVMContextRef C, const char* Name,
+                                  unsigned SLen) {
+  return unwrap(C)->getMDKindID(StringRef(Name, SLen));
+}
+
+unsigned LLVMGetMDKindID(const char* Name, unsigned SLen) {
+  return LLVMGetMDKindIDInContext(LLVMGetGlobalContext(), Name, SLen);
+}
+
 
 /*===-- Operations on modules ---------------------------------------------===*/
 
@@ -141,6 +150,8 @@ LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) {
     return LLVMFunctionTypeKind;
   case Type::StructTyID:
     return LLVMStructTypeKind;
+  case Type::UnionTyID:
+    return LLVMUnionTypeKind;
   case Type::ArrayTyID:
     return LLVMArrayTypeKind;
   case Type::PointerTyID:
@@ -299,6 +310,35 @@ LLVMBool LLVMIsPackedStruct(LLVMTypeRef StructTy) {
   return unwrap<StructType>(StructTy)->isPacked();
 }
 
+/*--.. Operations on union types ..........................................--*/
+
+LLVMTypeRef LLVMUnionTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes,
+                                   unsigned ElementCount) {
+  SmallVector<const Type*, 8> Tys;
+  for (LLVMTypeRef *I = ElementTypes,
+                   *E = ElementTypes + ElementCount; I != E; ++I)
+    Tys.push_back(unwrap(*I));
+  
+  return wrap(UnionType::get(&Tys[0], Tys.size()));
+}
+
+LLVMTypeRef LLVMUnionType(LLVMTypeRef *ElementTypes,
+                           unsigned ElementCount, int Packed) {
+  return LLVMUnionTypeInContext(LLVMGetGlobalContext(), ElementTypes,
+                                ElementCount);
+}
+
+unsigned LLVMCountUnionElementTypes(LLVMTypeRef UnionTy) {
+  return unwrap<UnionType>(UnionTy)->getNumElements();
+}
+
+void LLVMGetUnionElementTypes(LLVMTypeRef UnionTy, LLVMTypeRef *Dest) {
+  UnionType *Ty = unwrap<UnionType>(UnionTy);
+  for (FunctionType::param_iterator I = Ty->element_begin(),
+                                    E = Ty->element_end(); I != E; ++I)
+    *Dest++ = wrap(*I);
+}
+
 /*--.. Operations on array, pointer, and vector types (sequence types) .....--*/
 
 LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount) {
@@ -394,6 +434,18 @@ void LLVMReplaceAllUsesWith(LLVMValueRef OldVal, LLVMValueRef NewVal) {
   unwrap(OldVal)->replaceAllUsesWith(unwrap(NewVal));
 }
 
+int LLVMHasMetadata(LLVMValueRef Inst) {
+  return unwrap<Instruction>(Inst)->hasMetadata();
+}
+
+LLVMValueRef LLVMGetMetadata(LLVMValueRef Inst, unsigned KindID) {
+  return wrap(unwrap<Instruction>(Inst)->getMetadata(KindID));
+}
+
+void LLVMSetMetadata(LLVMValueRef Inst, unsigned KindID, LLVMValueRef MD) {
+  unwrap<Instruction>(Inst)->setMetadata(KindID, MD? unwrap<MDNode>(MD) : NULL);
+}
+
 /*--.. Conversion functions ................................................--*/
 
 #define LLVM_DEFINE_VALUE_CAST(name)                                       \
@@ -404,7 +456,7 @@ void LLVMReplaceAllUsesWith(LLVMValueRef OldVal, LLVMValueRef NewVal) {
 LLVM_FOR_EACH_VALUE_SUBCLASS(LLVM_DEFINE_VALUE_CAST)
 
 /*--.. Operations on Uses ..................................................--*/
-LLVMUseIteratorRef LLVMGetFirstUse(LLVMValueRef Val) {
+LLVMUseRef LLVMGetFirstUse(LLVMValueRef Val) {
   Value *V = unwrap(Val);
   Value::use_iterator I = V->use_begin();
   if (I == V->use_end())
@@ -412,16 +464,19 @@ LLVMUseIteratorRef LLVMGetFirstUse(LLVMValueRef Val) {
   return wrap(&(I.getUse()));
 }
 
-LLVMUseIteratorRef LLVMGetNextUse(LLVMUseIteratorRef UR) {
-  return wrap(unwrap(UR)->getNext());
+LLVMUseRef LLVMGetNextUse(LLVMUseRef U) {
+  Use *Next = unwrap(U)->getNext();
+  if (Next)
+    return wrap(Next);
+  return 0;
 }
 
-LLVMValueRef LLVMGetUser(LLVMUseIteratorRef UR) {
-  return wrap(unwrap(UR)->getUser());
+LLVMValueRef LLVMGetUser(LLVMUseRef U) {
+  return wrap(unwrap(U)->getUser());
 }
 
-LLVMValueRef LLVMGetUsedValue(LLVMUseIteratorRef UR) {
-  return wrap(unwrap(UR)->get());
+LLVMValueRef LLVMGetUsedValue(LLVMUseRef U) {
+  return wrap(unwrap(U)->get());
 }
 
 /*--.. Operations on Users .................................................--*/
@@ -462,6 +517,26 @@ LLVMValueRef LLVMConstPointerNull(LLVMTypeRef Ty) {
       wrap(ConstantPointerNull::get(unwrap<PointerType>(Ty)));
 }
 
+/*--.. Operations on metadata nodes ........................................--*/
+
+LLVMValueRef LLVMMDStringInContext(LLVMContextRef C, const char *Str,
+                                   unsigned SLen) {
+  return wrap(MDString::get(*unwrap(C), StringRef(Str, SLen)));
+}
+
+LLVMValueRef LLVMMDString(const char *Str, unsigned SLen) {
+  return LLVMMDStringInContext(LLVMGetGlobalContext(), Str, SLen);
+}
+
+LLVMValueRef LLVMMDNodeInContext(LLVMContextRef C, LLVMValueRef *Vals,
+                                 unsigned Count) {
+  return wrap(MDNode::get(*unwrap(C), unwrap<Value>(Vals, Count), Count));
+}
+
+LLVMValueRef LLVMMDNode(LLVMValueRef *Vals, unsigned Count) {
+  return LLVMMDNodeInContext(LLVMGetGlobalContext(), Vals, Count);
+}
+
 /*--.. Operations on scalar constants ......................................--*/
 
 LLVMValueRef LLVMConstInt(LLVMTypeRef IntTy, unsigned long long N,
@@ -536,11 +611,13 @@ LLVMValueRef LLVMConstStruct(LLVMValueRef *ConstantVals, unsigned Count,
   return LLVMConstStructInContext(LLVMGetGlobalContext(), ConstantVals, Count,
                                   Packed);
 }
-
 LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size) {
   return wrap(ConstantVector::get(
                             unwrap<Constant>(ScalarConstantVals, Size), Size));
 }
+LLVMValueRef LLVMConstUnion(LLVMTypeRef Ty, LLVMValueRef Val) {
+  return wrap(ConstantUnion::get(unwrap<UnionType>(Ty), unwrap<Constant>(Val)));
+}
 
 /*--.. Constant expressions ................................................--*/
 
@@ -561,6 +638,17 @@ LLVMValueRef LLVMConstNeg(LLVMValueRef ConstantVal) {
                                    unwrap<Constant>(ConstantVal)));
 }
 
+LLVMValueRef LLVMConstNSWNeg(LLVMValueRef ConstantVal) {
+  return wrap(ConstantExpr::getNSWNeg(
+                                      unwrap<Constant>(ConstantVal)));
+}
+
+LLVMValueRef LLVMConstNUWNeg(LLVMValueRef ConstantVal) {
+  return wrap(ConstantExpr::getNUWNeg(
+                                      unwrap<Constant>(ConstantVal)));
+}
+
+
 LLVMValueRef LLVMConstFNeg(LLVMValueRef ConstantVal) {
   return wrap(ConstantExpr::getFNeg(
                                     unwrap<Constant>(ConstantVal)));
@@ -584,6 +672,13 @@ LLVMValueRef LLVMConstNSWAdd(LLVMValueRef LHSConstant,
                                       unwrap<Constant>(RHSConstant)));
 }
 
+LLVMValueRef LLVMConstNUWAdd(LLVMValueRef LHSConstant,
+                             LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getNUWAdd(
+                                      unwrap<Constant>(LHSConstant),
+                                      unwrap<Constant>(RHSConstant)));
+}
+
 LLVMValueRef LLVMConstFAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
   return wrap(ConstantExpr::getFAdd(
                                     unwrap<Constant>(LHSConstant),
@@ -596,6 +691,20 @@ LLVMValueRef LLVMConstSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
                                    unwrap<Constant>(RHSConstant)));
 }
 
+LLVMValueRef LLVMConstNSWSub(LLVMValueRef LHSConstant,
+                             LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getNSWSub(
+                                      unwrap<Constant>(LHSConstant),
+                                      unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNUWSub(LLVMValueRef LHSConstant,
+                             LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getNUWSub(
+                                      unwrap<Constant>(LHSConstant),
+                                      unwrap<Constant>(RHSConstant)));
+}
+
 LLVMValueRef LLVMConstFSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
   return wrap(ConstantExpr::getFSub(unwrap<Constant>(LHSConstant),
                                     unwrap<Constant>(RHSConstant)));
@@ -607,6 +716,20 @@ LLVMValueRef LLVMConstMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
                                    unwrap<Constant>(RHSConstant)));
 }
 
+LLVMValueRef LLVMConstNSWMul(LLVMValueRef LHSConstant,
+                             LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getNSWMul(
+                                      unwrap<Constant>(LHSConstant),
+                                      unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNUWMul(LLVMValueRef LHSConstant,
+                             LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getNUWMul(
+                                      unwrap<Constant>(LHSConstant),
+                                      unwrap<Constant>(RHSConstant)));
+}
+
 LLVMValueRef LLVMConstFMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
   return wrap(ConstantExpr::getFMul(
                                     unwrap<Constant>(LHSConstant),
@@ -893,6 +1016,10 @@ LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty, const char *AsmString,
                              Constraints, HasSideEffects, IsAlignStack));
 }
 
+LLVMValueRef LLVMBlockAddress(LLVMValueRef F, LLVMBasicBlockRef BB) {
+  return wrap(BlockAddress::get(unwrap<Function>(F), unwrap(BB)));
+}
+
 /*--.. Operations on global variables, functions, and aliases (globals) ....--*/
 
 LLVMModuleRef LLVMGetGlobalParent(LLVMValueRef Global) {
@@ -1029,6 +1156,14 @@ LLVMValueRef LLVMAddGlobal(LLVMModuleRef M, LLVMTypeRef Ty, const char *Name) {
                                  GlobalValue::ExternalLinkage, 0, Name));
 }
 
+LLVMValueRef LLVMAddGlobalInAddressSpace(LLVMModuleRef M, LLVMTypeRef Ty,
+                                         const char *Name,
+                                         unsigned AddressSpace) {
+  return wrap(new GlobalVariable(*unwrap(M), unwrap(Ty), false,
+                                 GlobalValue::ExternalLinkage, 0, Name, 0,
+                                 false, AddressSpace));
+}
+
 LLVMValueRef LLVMGetNamedGlobal(LLVMModuleRef M, const char *Name) {
   return wrap(unwrap(M)->getNamedGlobal(Name));
 }
@@ -1184,14 +1319,14 @@ void LLVMSetGC(LLVMValueRef Fn, const char *GC) {
 void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
   Function *Func = unwrap<Function>(Fn);
   const AttrListPtr PAL = Func->getAttributes();
-  const AttrListPtr PALnew = PAL.addAttr(0, PA);
+  const AttrListPtr PALnew = PAL.addAttr(~0U, PA);
   Func->setAttributes(PALnew);
 }
 
 void LLVMRemoveFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
   Function *Func = unwrap<Function>(Fn);
   const AttrListPtr PAL = Func->getAttributes();
-  const AttrListPtr PALnew = PAL.removeAttr(0, PA);
+  const AttrListPtr PALnew = PAL.removeAttr(~0U, PA);
   Func->setAttributes(PALnew);
 }
 
@@ -1532,6 +1667,21 @@ void LLVMDisposeBuilder(LLVMBuilderRef Builder) {
   delete unwrap(Builder);
 }
 
+/*--.. Metadata builders ...................................................--*/
+
+void LLVMSetCurrentDebugLocation(LLVMBuilderRef Builder, LLVMValueRef L) {
+  unwrap(Builder)->SetCurrentDebugLocation(L? unwrap<MDNode>(L) : NULL);
+}
+
+LLVMValueRef LLVMGetCurrentDebugLocation(LLVMBuilderRef Builder) {
+  return wrap(unwrap(Builder)->getCurrentDebugLocation());
+}
+
+void LLVMSetInstDebugLocation(LLVMBuilderRef Builder, LLVMValueRef Inst) {
+  unwrap(Builder)->SetInstDebugLocation(unwrap<Instruction>(Inst));
+}
+
+
 /*--.. Instruction builders ................................................--*/
 
 LLVMValueRef LLVMBuildRetVoid(LLVMBuilderRef B) {
@@ -1561,6 +1711,11 @@ LLVMValueRef LLVMBuildSwitch(LLVMBuilderRef B, LLVMValueRef V,
   return wrap(unwrap(B)->CreateSwitch(unwrap(V), unwrap(Else), NumCases));
 }
 
+LLVMValueRef LLVMBuildIndirectBr(LLVMBuilderRef B, LLVMValueRef Addr,
+                                 unsigned NumDests) {
+  return wrap(unwrap(B)->CreateIndirectBr(unwrap(Addr), NumDests));
+}
+
 LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef B, LLVMValueRef Fn,
                              LLVMValueRef *Args, unsigned NumArgs,
                              LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch,
@@ -1583,6 +1738,10 @@ void LLVMAddCase(LLVMValueRef Switch, LLVMValueRef OnVal,
   unwrap<SwitchInst>(Switch)->addCase(unwrap<ConstantInt>(OnVal), unwrap(Dest));
 }
 
+void LLVMAddDestination(LLVMValueRef IndirectBr, LLVMBasicBlockRef Dest) {
+  unwrap<IndirectBrInst>(IndirectBr)->addDestination(unwrap(Dest));
+}
+
 /*--.. Arithmetic ..........................................................--*/
 
 LLVMValueRef LLVMBuildAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
@@ -1595,6 +1754,11 @@ LLVMValueRef LLVMBuildNSWAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RH
   return wrap(unwrap(B)->CreateNSWAdd(unwrap(LHS), unwrap(RHS), Name));
 }
 
+LLVMValueRef LLVMBuildNUWAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateNUWAdd(unwrap(LHS), unwrap(RHS), Name));
+}
+
 LLVMValueRef LLVMBuildFAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
                           const char *Name) {
   return wrap(unwrap(B)->CreateFAdd(unwrap(LHS), unwrap(RHS), Name));
@@ -1605,6 +1769,16 @@ LLVMValueRef LLVMBuildSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
   return wrap(unwrap(B)->CreateSub(unwrap(LHS), unwrap(RHS), Name));
 }
 
+LLVMValueRef LLVMBuildNSWSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateNSWSub(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNUWSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateNUWSub(unwrap(LHS), unwrap(RHS), Name));
+}
+
 LLVMValueRef LLVMBuildFSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
                           const char *Name) {
   return wrap(unwrap(B)->CreateFSub(unwrap(LHS), unwrap(RHS), Name));
@@ -1615,6 +1789,16 @@ LLVMValueRef LLVMBuildMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
   return wrap(unwrap(B)->CreateMul(unwrap(LHS), unwrap(RHS), Name));
 }
 
+LLVMValueRef LLVMBuildNSWMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateNSWMul(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNUWMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateNUWMul(unwrap(LHS), unwrap(RHS), Name));
+}
+
 LLVMValueRef LLVMBuildFMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
                           const char *Name) {
   return wrap(unwrap(B)->CreateFMul(unwrap(LHS), unwrap(RHS), Name));
@@ -1685,10 +1869,27 @@ LLVMValueRef LLVMBuildXor(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
   return wrap(unwrap(B)->CreateXor(unwrap(LHS), unwrap(RHS), Name));
 }
 
+LLVMValueRef LLVMBuildBinOp(LLVMBuilderRef B, LLVMOpcode Op,
+                            LLVMValueRef LHS, LLVMValueRef RHS,
+                            const char *Name) {
+  return wrap(unwrap(B)->CreateBinOp(Instruction::BinaryOps(Op), unwrap(LHS),
+                                     unwrap(RHS), Name));
+}
+
 LLVMValueRef LLVMBuildNeg(LLVMBuilderRef B, LLVMValueRef V, const char *Name) {
   return wrap(unwrap(B)->CreateNeg(unwrap(V), Name));
 }
 
+LLVMValueRef LLVMBuildNSWNeg(LLVMBuilderRef B, LLVMValueRef V,
+                             const char *Name) {
+  return wrap(unwrap(B)->CreateNSWNeg(unwrap(V), Name));
+}
+
+LLVMValueRef LLVMBuildNUWNeg(LLVMBuilderRef B, LLVMValueRef V,
+                             const char *Name) {
+  return wrap(unwrap(B)->CreateNUWNeg(unwrap(V), Name));
+}
+
 LLVMValueRef LLVMBuildFNeg(LLVMBuilderRef B, LLVMValueRef V, const char *Name) {
   return wrap(unwrap(B)->CreateFNeg(unwrap(V), Name));
 }
@@ -1856,6 +2057,12 @@ LLVMValueRef LLVMBuildTruncOrBitCast(LLVMBuilderRef B, LLVMValueRef Val,
                                               Name));
 }
 
+LLVMValueRef LLVMBuildCast(LLVMBuilderRef B, LLVMOpcode Op, LLVMValueRef Val,
+                           LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateCast(Instruction::CastOps(Op), unwrap(Val),
+                                    unwrap(DestTy), Name));
+}
+
 LLVMValueRef LLVMBuildPointerCast(LLVMBuilderRef B, LLVMValueRef Val,
                                   LLVMTypeRef DestTy, const char *Name) {
   return wrap(unwrap(B)->CreatePointerCast(unwrap(Val), unwrap(DestTy), Name));
diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp
index f00f6ee..dbc283e 100644
--- a/lib/VMCore/Function.cpp
+++ b/lib/VMCore/Function.cpp
@@ -73,35 +73,35 @@ unsigned Argument::getArgNo() const {
 /// hasByValAttr - Return true if this argument has the byval attribute on it
 /// in its containing function.
 bool Argument::hasByValAttr() const {
-  if (!isa<PointerType>(getType())) return false;
+  if (!getType()->isPointerTy()) return false;
   return getParent()->paramHasAttr(getArgNo()+1, Attribute::ByVal);
 }
 
 /// hasNestAttr - Return true if this argument has the nest attribute on
 /// it in its containing function.
 bool Argument::hasNestAttr() const {
-  if (!isa<PointerType>(getType())) return false;
+  if (!getType()->isPointerTy()) return false;
   return getParent()->paramHasAttr(getArgNo()+1, Attribute::Nest);
 }
 
 /// hasNoAliasAttr - Return true if this argument has the noalias attribute on
 /// it in its containing function.
 bool Argument::hasNoAliasAttr() const {
-  if (!isa<PointerType>(getType())) return false;
+  if (!getType()->isPointerTy()) return false;
   return getParent()->paramHasAttr(getArgNo()+1, Attribute::NoAlias);
 }
 
 /// hasNoCaptureAttr - Return true if this argument has the nocapture attribute
 /// on it in its containing function.
 bool Argument::hasNoCaptureAttr() const {
-  if (!isa<PointerType>(getType())) return false;
+  if (!getType()->isPointerTy()) return false;
   return getParent()->paramHasAttr(getArgNo()+1, Attribute::NoCapture);
 }
 
 /// hasSRetAttr - Return true if this argument has the sret attribute on
 /// it in its containing function.
 bool Argument::hasStructRetAttr() const {
-  if (!isa<PointerType>(getType())) return false;
+  if (!getType()->isPointerTy()) return false;
   if (this != getParent()->arg_begin())
     return false; // StructRet param must be first param
   return getParent()->paramHasAttr(1, Attribute::StructRet);
@@ -155,7 +155,7 @@ Function::Function(const FunctionType *Ty, LinkageTypes Linkage,
   : GlobalValue(PointerType::getUnqual(Ty), 
                 Value::FunctionVal, 0, 0, Linkage, name) {
   assert(FunctionType::isValidReturnType(getReturnType()) &&
-         !isa<OpaqueType>(getReturnType()) && "invalid return type");
+         !getReturnType()->isOpaqueTy() && "invalid return type");
   SymTab = new ValueSymbolTable();
 
   // If the function has arguments, mark them as lazily built.
diff --git a/lib/VMCore/Globals.cpp b/lib/VMCore/Globals.cpp
index f149c44..489ec65 100644
--- a/lib/VMCore/Globals.cpp
+++ b/lib/VMCore/Globals.cpp
@@ -44,10 +44,10 @@ static bool removeDeadUsersOfConstant(const Constant *C) {
 }
 
 bool GlobalValue::isMaterializable() const {
-  return getParent()->isMaterializable(this);
+  return getParent() && getParent()->isMaterializable(this);
 }
 bool GlobalValue::isDematerializable() const {
-  return getParent()->isDematerializable(this);
+  return getParent() && getParent()->isDematerializable(this);
 }
 bool GlobalValue::Materialize(std::string *ErrInfo) {
   return getParent()->Materialize(this, ErrInfo);
diff --git a/lib/VMCore/IRBuilder.cpp b/lib/VMCore/IRBuilder.cpp
index 4bc3cbb..9f2786e 100644
--- a/lib/VMCore/IRBuilder.cpp
+++ b/lib/VMCore/IRBuilder.cpp
@@ -19,7 +19,7 @@
 using namespace llvm;
 
 /// CreateGlobalString - Make a new global variable with an initializer that
-/// has array of i8 type filled in the nul terminated string value
+/// has array of i8 type filled in with the nul terminated string value
 /// specified.  If Name is specified, it is the name of the global variable
 /// created.
 Value *IRBuilderBase::CreateGlobalString(const char *Str, const Twine &Name) {
diff --git a/lib/VMCore/InlineAsm.cpp b/lib/VMCore/InlineAsm.cpp
index ec21773..6355834 100644
--- a/lib/VMCore/InlineAsm.cpp
+++ b/lib/VMCore/InlineAsm.cpp
@@ -220,7 +220,7 @@ bool InlineAsm::Verify(const FunctionType *Ty, StringRef ConstStr) {
     if (!Ty->getReturnType()->isVoidTy()) return false;
     break;
   case 1:
-    if (isa<StructType>(Ty->getReturnType())) return false;
+    if (Ty->getReturnType()->isStructTy()) return false;
     break;
   default:
     const StructType *STy = dyn_cast<StructType>(Ty->getReturnType());
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index 4ec8295..8f4763f 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -562,7 +562,7 @@ static Instruction* createFree(Value* Source, Instruction *InsertBefore,
                                BasicBlock *InsertAtEnd) {
   assert(((!InsertBefore && InsertAtEnd) || (InsertBefore && !InsertAtEnd)) &&
          "createFree needs either InsertBefore or InsertAtEnd");
-  assert(isa<PointerType>(Source->getType()) &&
+  assert(Source->getType()->isPointerTy() &&
          "Can not free something of nonpointer type!");
 
   BasicBlock* BB = InsertBefore ? InsertBefore->getParent() : InsertAtEnd;
@@ -787,7 +787,7 @@ BasicBlock *UnreachableInst::getSuccessorV(unsigned idx) const {
 
 void BranchInst::AssertOK() {
   if (isConditional())
-    assert(getCondition()->getType()->isInteger(1) &&
+    assert(getCondition()->getType()->isIntegerTy(1) &&
            "May only branch on boolean predicates!");
 }
 
@@ -892,7 +892,7 @@ static Value *getAISize(LLVMContext &Context, Value *Amt) {
   else {
     assert(!isa<BasicBlock>(Amt) &&
            "Passed basic block into allocation size parameter! Use other ctor");
-    assert(Amt->getType()->isInteger(32) &&
+    assert(Amt->getType()->isIntegerTy(32) &&
            "Allocation array size is not a 32-bit integer!");
   }
   return Amt;
@@ -989,7 +989,7 @@ bool AllocaInst::isStaticAlloca() const {
 //===----------------------------------------------------------------------===//
 
 void LoadInst::AssertOK() {
-  assert(isa<PointerType>(getOperand(0)->getType()) &&
+  assert(getOperand(0)->getType()->isPointerTy() &&
          "Ptr must have pointer type.");
 }
 
@@ -1103,7 +1103,7 @@ void LoadInst::setAlignment(unsigned Align) {
 
 void StoreInst::AssertOK() {
   assert(getOperand(0) && getOperand(1) && "Both operands must be non-null!");
-  assert(isa<PointerType>(getOperand(1)->getType()) &&
+  assert(getOperand(1)->getType()->isPointerTy() &&
          "Ptr must have pointer type!");
   assert(getOperand(0)->getType() ==
                  cast<PointerType>(getOperand(1)->getType())->getElementType()
@@ -1285,7 +1285,7 @@ static const Type* getIndexedTypeInternal(const Type *Ptr, IndexTy const *Idxs,
   unsigned CurIdx = 1;
   for (; CurIdx != NumIdx; ++CurIdx) {
     const CompositeType *CT = dyn_cast<CompositeType>(Agg);
-    if (!CT || isa<PointerType>(CT)) return 0;
+    if (!CT || CT->isPointerTy()) return 0;
     IndexTy Index = Idxs[CurIdx];
     if (!CT->indexValid(Index)) return 0;
     Agg = CT->getTypeAtIndex(Index);
@@ -1391,7 +1391,7 @@ ExtractElementInst::ExtractElementInst(Value *Val, Value *Index,
 
 
 bool ExtractElementInst::isValidOperands(const Value *Val, const Value *Index) {
-  if (!isa<VectorType>(Val->getType()) || !Index->getType()->isInteger(32))
+  if (!Val->getType()->isVectorTy() || !Index->getType()->isIntegerTy(32))
     return false;
   return true;
 }
@@ -1432,13 +1432,13 @@ InsertElementInst::InsertElementInst(Value *Vec, Value *Elt, Value *Index,
 
 bool InsertElementInst::isValidOperands(const Value *Vec, const Value *Elt, 
                                         const Value *Index) {
-  if (!isa<VectorType>(Vec->getType()))
+  if (!Vec->getType()->isVectorTy())
     return false;   // First operand of insertelement must be vector type.
   
   if (Elt->getType() != cast<VectorType>(Vec->getType())->getElementType())
     return false;// Second operand of insertelement must be vector element type.
     
-  if (!Index->getType()->isInteger(32))
+  if (!Index->getType()->isIntegerTy(32))
     return false;  // Third operand of insertelement must be i32.
   return true;
 }
@@ -1485,12 +1485,12 @@ ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
 
 bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
                                         const Value *Mask) {
-  if (!isa<VectorType>(V1->getType()) || V1->getType() != V2->getType())
+  if (!V1->getType()->isVectorTy() || V1->getType() != V2->getType())
     return false;
   
   const VectorType *MaskTy = dyn_cast<VectorType>(Mask->getType());
   if (!isa<Constant>(Mask) || MaskTy == 0 ||
-      !MaskTy->getElementType()->isInteger(32))
+      !MaskTy->getElementType()->isIntegerTy(32))
     return false;
   return true;
 }
@@ -1602,7 +1602,7 @@ const Type* ExtractValueInst::getIndexedType(const Type *Agg,
   unsigned CurIdx = 0;
   for (; CurIdx != NumIdx; ++CurIdx) {
     const CompositeType *CT = dyn_cast<CompositeType>(Agg);
-    if (!CT || isa<PointerType>(CT) || isa<VectorType>(CT)) return 0;
+    if (!CT || CT->isPointerTy() || CT->isVectorTy()) return 0;
     unsigned Index = Idxs[CurIdx];
     if (!CT->indexValid(Index)) return 0;
     Agg = CT->getTypeAtIndex(Index);
@@ -1632,7 +1632,7 @@ const Type* ExtractValueInst::getIndexedType(const Type *Agg,
 static BinaryOperator::BinaryOps AdjustIType(BinaryOperator::BinaryOps iType,
                                              const Type *Ty) {
   // API compatibility: Adjust integer opcodes to floating-point opcodes.
-  if (Ty->isFPOrFPVector()) {
+  if (Ty->isFPOrFPVectorTy()) {
     if (iType == BinaryOperator::Add) iType = BinaryOperator::FAdd;
     else if (iType == BinaryOperator::Sub) iType = BinaryOperator::FSub;
     else if (iType == BinaryOperator::Mul) iType = BinaryOperator::FMul;
@@ -1678,14 +1678,14 @@ void BinaryOperator::init(BinaryOps iType) {
   case Mul:
     assert(getType() == LHS->getType() &&
            "Arithmetic operation should return same type as operands!");
-    assert(getType()->isIntOrIntVector() &&
+    assert(getType()->isIntOrIntVectorTy() &&
            "Tried to create an integer operation on a non-integer type!");
     break;
   case FAdd: case FSub:
   case FMul:
     assert(getType() == LHS->getType() &&
            "Arithmetic operation should return same type as operands!");
-    assert(getType()->isFPOrFPVector() &&
+    assert(getType()->isFPOrFPVectorTy() &&
            "Tried to create a floating-point operation on a "
            "non-floating-point type!");
     break;
@@ -1693,28 +1693,28 @@ void BinaryOperator::init(BinaryOps iType) {
   case SDiv: 
     assert(getType() == LHS->getType() &&
            "Arithmetic operation should return same type as operands!");
-    assert((getType()->isInteger() || (isa<VectorType>(getType()) && 
-            cast<VectorType>(getType())->getElementType()->isInteger())) &&
+    assert((getType()->isIntegerTy() || (getType()->isVectorTy() && 
+            cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
            "Incorrect operand type (not integer) for S/UDIV");
     break;
   case FDiv:
     assert(getType() == LHS->getType() &&
            "Arithmetic operation should return same type as operands!");
-    assert(getType()->isFPOrFPVector() &&
+    assert(getType()->isFPOrFPVectorTy() &&
            "Incorrect operand type (not floating point) for FDIV");
     break;
   case URem: 
   case SRem: 
     assert(getType() == LHS->getType() &&
            "Arithmetic operation should return same type as operands!");
-    assert((getType()->isInteger() || (isa<VectorType>(getType()) && 
-            cast<VectorType>(getType())->getElementType()->isInteger())) &&
+    assert((getType()->isIntegerTy() || (getType()->isVectorTy() && 
+            cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
            "Incorrect operand type (not integer) for S/UREM");
     break;
   case FRem:
     assert(getType() == LHS->getType() &&
            "Arithmetic operation should return same type as operands!");
-    assert(getType()->isFPOrFPVector() &&
+    assert(getType()->isFPOrFPVectorTy() &&
            "Incorrect operand type (not floating point) for FREM");
     break;
   case Shl:
@@ -1722,18 +1722,18 @@ void BinaryOperator::init(BinaryOps iType) {
   case AShr:
     assert(getType() == LHS->getType() &&
            "Shift operation should return same type as operands!");
-    assert((getType()->isInteger() ||
-            (isa<VectorType>(getType()) && 
-             cast<VectorType>(getType())->getElementType()->isInteger())) &&
+    assert((getType()->isIntegerTy() ||
+            (getType()->isVectorTy() && 
+             cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
            "Tried to create a shift operation on a non-integral type!");
     break;
   case And: case Or:
   case Xor:
     assert(getType() == LHS->getType() &&
            "Logical operation should return same type as operands!");
-    assert((getType()->isInteger() ||
-            (isa<VectorType>(getType()) && 
-             cast<VectorType>(getType())->getElementType()->isInteger())) &&
+    assert((getType()->isIntegerTy() ||
+            (getType()->isVectorTy() && 
+             cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
            "Tried to create a logical operation on a non-integral type!");
     break;
   default:
@@ -1960,7 +1960,8 @@ bool CastInst::isIntegerCast() const {
     case Instruction::Trunc:
       return true;
     case Instruction::BitCast:
-      return getOperand(0)->getType()->isInteger() && getType()->isInteger();
+      return getOperand(0)->getType()->isIntegerTy() &&
+        getType()->isIntegerTy();
   }
 }
 
@@ -1976,8 +1977,8 @@ bool CastInst::isLosslessCast() const {
     return true;
   
   // Pointer to pointer is always lossless.
-  if (isa<PointerType>(SrcTy))
-    return isa<PointerType>(DstTy);
+  if (SrcTy->isPointerTy())
+    return DstTy->isPointerTy();
   return false;  // Other types have no identity values
 }
 
@@ -2093,25 +2094,25 @@ unsigned CastInst::isEliminableCastPair(
       // no-op cast in second op implies firstOp as long as the DestTy 
       // is integer and we are not converting between a vector and a
       // non vector type.
-      if (!isa<VectorType>(SrcTy) && DstTy->isInteger())
+      if (!SrcTy->isVectorTy() && DstTy->isIntegerTy())
         return firstOp;
       return 0;
     case 4:
       // no-op cast in second op implies firstOp as long as the DestTy
       // is floating point.
-      if (DstTy->isFloatingPoint())
+      if (DstTy->isFloatingPointTy())
         return firstOp;
       return 0;
     case 5: 
       // no-op cast in first op implies secondOp as long as the SrcTy
       // is an integer.
-      if (SrcTy->isInteger())
+      if (SrcTy->isIntegerTy())
         return secondOp;
       return 0;
     case 6:
       // no-op cast in first op implies secondOp as long as the SrcTy
       // is a floating point.
-      if (SrcTy->isFloatingPoint())
+      if (SrcTy->isFloatingPointTy())
         return secondOp;
       return 0;
     case 7: { 
@@ -2147,12 +2148,12 @@ unsigned CastInst::isEliminableCastPair(
     case 11:
       // bitcast followed by ptrtoint is allowed as long as the bitcast
       // is a pointer to pointer cast.
-      if (isa<PointerType>(SrcTy) && isa<PointerType>(MidTy))
+      if (SrcTy->isPointerTy() && MidTy->isPointerTy())
         return secondOp;
       return 0;
     case 12:
       // inttoptr, bitcast -> intptr  if bitcast is a ptr to ptr cast
-      if (isa<PointerType>(MidTy) && isa<PointerType>(DstTy))
+      if (MidTy->isPointerTy() && DstTy->isPointerTy())
         return firstOp;
       return 0;
     case 13: {
@@ -2273,11 +2274,11 @@ CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty,
 CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty,
                                       const Twine &Name,
                                       BasicBlock *InsertAtEnd) {
-  assert(isa<PointerType>(S->getType()) && "Invalid cast");
-  assert((Ty->isInteger() || isa<PointerType>(Ty)) &&
+  assert(S->getType()->isPointerTy() && "Invalid cast");
+  assert((Ty->isIntegerTy() || Ty->isPointerTy()) &&
          "Invalid cast");
 
-  if (Ty->isInteger())
+  if (Ty->isIntegerTy())
     return Create(Instruction::PtrToInt, S, Ty, Name, InsertAtEnd);
   return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
 }
@@ -2286,11 +2287,11 @@ CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty,
 CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty, 
                                       const Twine &Name, 
                                       Instruction *InsertBefore) {
-  assert(isa<PointerType>(S->getType()) && "Invalid cast");
-  assert((Ty->isInteger() || isa<PointerType>(Ty)) &&
+  assert(S->getType()->isPointerTy() && "Invalid cast");
+  assert((Ty->isIntegerTy() || Ty->isPointerTy()) &&
          "Invalid cast");
 
-  if (Ty->isInteger())
+  if (Ty->isIntegerTy())
     return Create(Instruction::PtrToInt, S, Ty, Name, InsertBefore);
   return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
 }
@@ -2298,7 +2299,7 @@ CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty,
 CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty, 
                                       bool isSigned, const Twine &Name,
                                       Instruction *InsertBefore) {
-  assert(C->getType()->isIntOrIntVector() && Ty->isIntOrIntVector() &&
+  assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() &&
          "Invalid integer cast");
   unsigned SrcBits = C->getType()->getScalarSizeInBits();
   unsigned DstBits = Ty->getScalarSizeInBits();
@@ -2312,7 +2313,7 @@ CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty,
 CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty, 
                                       bool isSigned, const Twine &Name,
                                       BasicBlock *InsertAtEnd) {
-  assert(C->getType()->isIntOrIntVector() && Ty->isIntOrIntVector() &&
+  assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() &&
          "Invalid cast");
   unsigned SrcBits = C->getType()->getScalarSizeInBits();
   unsigned DstBits = Ty->getScalarSizeInBits();
@@ -2326,7 +2327,7 @@ CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty,
 CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty, 
                                  const Twine &Name, 
                                  Instruction *InsertBefore) {
-  assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() &&
+  assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
          "Invalid cast");
   unsigned SrcBits = C->getType()->getScalarSizeInBits();
   unsigned DstBits = Ty->getScalarSizeInBits();
@@ -2339,7 +2340,7 @@ CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty,
 CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty, 
                                  const Twine &Name, 
                                  BasicBlock *InsertAtEnd) {
-  assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() &&
+  assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
          "Invalid cast");
   unsigned SrcBits = C->getType()->getScalarSizeInBits();
   unsigned DstBits = Ty->getScalarSizeInBits();
@@ -2363,21 +2364,21 @@ bool CastInst::isCastable(const Type *SrcTy, const Type *DestTy) {
   unsigned DestBits = DestTy->getScalarSizeInBits(); // 0 for ptr
 
   // Run through the possibilities ...
-  if (DestTy->isInteger()) {                   // Casting to integral
-    if (SrcTy->isInteger()) {                  // Casting from integral
+  if (DestTy->isIntegerTy()) {                   // Casting to integral
+    if (SrcTy->isIntegerTy()) {                  // Casting from integral
         return true;
-    } else if (SrcTy->isFloatingPoint()) {     // Casting from floating pt
+    } else if (SrcTy->isFloatingPointTy()) {     // Casting from floating pt
       return true;
     } else if (const VectorType *PTy = dyn_cast<VectorType>(SrcTy)) {
                                                // Casting from vector
       return DestBits == PTy->getBitWidth();
     } else {                                   // Casting from something else
-      return isa<PointerType>(SrcTy);
+      return SrcTy->isPointerTy();
     }
-  } else if (DestTy->isFloatingPoint()) {      // Casting to floating pt
-    if (SrcTy->isInteger()) {                  // Casting from integral
+  } else if (DestTy->isFloatingPointTy()) {      // Casting to floating pt
+    if (SrcTy->isIntegerTy()) {                  // Casting from integral
       return true;
-    } else if (SrcTy->isFloatingPoint()) {     // Casting from floating pt
+    } else if (SrcTy->isFloatingPointTy()) {     // Casting from floating pt
       return true;
     } else if (const VectorType *PTy = dyn_cast<VectorType>(SrcTy)) {
                                                // Casting from vector
@@ -2393,10 +2394,10 @@ bool CastInst::isCastable(const Type *SrcTy, const Type *DestTy) {
     } else {                                    // Casting from something else
       return DestPTy->getBitWidth() == SrcBits;
     }
-  } else if (isa<PointerType>(DestTy)) {        // Casting to pointer
-    if (isa<PointerType>(SrcTy)) {              // Casting from pointer
+  } else if (DestTy->isPointerTy()) {        // Casting to pointer
+    if (SrcTy->isPointerTy()) {              // Casting from pointer
       return true;
-    } else if (SrcTy->isInteger()) {            // Casting from integral
+    } else if (SrcTy->isIntegerTy()) {            // Casting from integral
       return true;
     } else {                                    // Casting from something else
       return false;
@@ -2425,8 +2426,8 @@ CastInst::getCastOpcode(
          "Only first class types are castable!");
 
   // Run through the possibilities ...
-  if (DestTy->isInteger()) {                       // Casting to integral
-    if (SrcTy->isInteger()) {                      // Casting from integral
+  if (DestTy->isIntegerTy()) {                      // Casting to integral
+    if (SrcTy->isIntegerTy()) {                     // Casting from integral
       if (DestBits < SrcBits)
         return Trunc;                               // int -> smaller int
       else if (DestBits > SrcBits) {                // its an extension
@@ -2437,7 +2438,7 @@ CastInst::getCastOpcode(
       } else {
         return BitCast;                             // Same size, No-op cast
       }
-    } else if (SrcTy->isFloatingPoint()) {          // Casting from floating pt
+    } else if (SrcTy->isFloatingPointTy()) {        // Casting from floating pt
       if (DestIsSigned) 
         return FPToSI;                              // FP -> sint
       else
@@ -2448,17 +2449,17 @@ CastInst::getCastOpcode(
       PTy = NULL;
       return BitCast;                             // Same size, no-op cast
     } else {
-      assert(isa<PointerType>(SrcTy) &&
+      assert(SrcTy->isPointerTy() &&
              "Casting from a value that is not first-class type");
       return PtrToInt;                              // ptr -> int
     }
-  } else if (DestTy->isFloatingPoint()) {           // Casting to floating pt
-    if (SrcTy->isInteger()) {                      // Casting from integral
+  } else if (DestTy->isFloatingPointTy()) {         // Casting to floating pt
+    if (SrcTy->isIntegerTy()) {                     // Casting from integral
       if (SrcIsSigned)
         return SIToFP;                              // sint -> FP
       else
         return UIToFP;                              // uint -> FP
-    } else if (SrcTy->isFloatingPoint()) {          // Casting from floating pt
+    } else if (SrcTy->isFloatingPointTy()) {        // Casting from floating pt
       if (DestBits < SrcBits) {
         return FPTrunc;                             // FP -> smaller FP
       } else if (DestBits > SrcBits) {
@@ -2485,10 +2486,10 @@ CastInst::getCastOpcode(
     } else {
       assert(!"Illegal cast to vector (wrong type or size)");
     }
-  } else if (isa<PointerType>(DestTy)) {
-    if (isa<PointerType>(SrcTy)) {
+  } else if (DestTy->isPointerTy()) {
+    if (SrcTy->isPointerTy()) {
       return BitCast;                               // ptr -> ptr
-    } else if (SrcTy->isInteger()) {
+    } else if (SrcTy->isIntegerTy()) {
       return IntToPtr;                              // int -> ptr
     } else {
       assert(!"Casting pointer to other than pointer or int");
@@ -2528,50 +2529,50 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, const Type *DstTy) {
   switch (op) {
   default: return false; // This is an input error
   case Instruction::Trunc:
-    return SrcTy->isIntOrIntVector() &&
-           DstTy->isIntOrIntVector()&& SrcBitSize > DstBitSize;
+    return SrcTy->isIntOrIntVectorTy() &&
+           DstTy->isIntOrIntVectorTy()&& SrcBitSize > DstBitSize;
   case Instruction::ZExt:
-    return SrcTy->isIntOrIntVector() &&
-           DstTy->isIntOrIntVector()&& SrcBitSize < DstBitSize;
+    return SrcTy->isIntOrIntVectorTy() &&
+           DstTy->isIntOrIntVectorTy()&& SrcBitSize < DstBitSize;
   case Instruction::SExt: 
-    return SrcTy->isIntOrIntVector() &&
-           DstTy->isIntOrIntVector()&& SrcBitSize < DstBitSize;
+    return SrcTy->isIntOrIntVectorTy() &&
+           DstTy->isIntOrIntVectorTy()&& SrcBitSize < DstBitSize;
   case Instruction::FPTrunc:
-    return SrcTy->isFPOrFPVector() &&
-           DstTy->isFPOrFPVector() && 
+    return SrcTy->isFPOrFPVectorTy() &&
+           DstTy->isFPOrFPVectorTy() && 
            SrcBitSize > DstBitSize;
   case Instruction::FPExt:
-    return SrcTy->isFPOrFPVector() &&
-           DstTy->isFPOrFPVector() && 
+    return SrcTy->isFPOrFPVectorTy() &&
+           DstTy->isFPOrFPVectorTy() && 
            SrcBitSize < DstBitSize;
   case Instruction::UIToFP:
   case Instruction::SIToFP:
     if (const VectorType *SVTy = dyn_cast<VectorType>(SrcTy)) {
       if (const VectorType *DVTy = dyn_cast<VectorType>(DstTy)) {
-        return SVTy->getElementType()->isIntOrIntVector() &&
-               DVTy->getElementType()->isFPOrFPVector() &&
+        return SVTy->getElementType()->isIntOrIntVectorTy() &&
+               DVTy->getElementType()->isFPOrFPVectorTy() &&
                SVTy->getNumElements() == DVTy->getNumElements();
       }
     }
-    return SrcTy->isIntOrIntVector() && DstTy->isFPOrFPVector();
+    return SrcTy->isIntOrIntVectorTy() && DstTy->isFPOrFPVectorTy();
   case Instruction::FPToUI:
   case Instruction::FPToSI:
     if (const VectorType *SVTy = dyn_cast<VectorType>(SrcTy)) {
       if (const VectorType *DVTy = dyn_cast<VectorType>(DstTy)) {
-        return SVTy->getElementType()->isFPOrFPVector() &&
-               DVTy->getElementType()->isIntOrIntVector() &&
+        return SVTy->getElementType()->isFPOrFPVectorTy() &&
+               DVTy->getElementType()->isIntOrIntVectorTy() &&
                SVTy->getNumElements() == DVTy->getNumElements();
       }
     }
-    return SrcTy->isFPOrFPVector() && DstTy->isIntOrIntVector();
+    return SrcTy->isFPOrFPVectorTy() && DstTy->isIntOrIntVectorTy();
   case Instruction::PtrToInt:
-    return isa<PointerType>(SrcTy) && DstTy->isInteger();
+    return SrcTy->isPointerTy() && DstTy->isIntegerTy();
   case Instruction::IntToPtr:
-    return SrcTy->isInteger() && isa<PointerType>(DstTy);
+    return SrcTy->isIntegerTy() && DstTy->isPointerTy();
   case Instruction::BitCast:
     // BitCast implies a no-op cast of type only. No bits change.
     // However, you can't cast pointers to anything but pointers.
-    if (isa<PointerType>(SrcTy) != isa<PointerType>(DstTy))
+    if (SrcTy->isPointerTy() != DstTy->isPointerTy())
       return false;
 
     // Now we know we're not dealing with a pointer/non-pointer mismatch. In all
@@ -3149,7 +3150,7 @@ void SwitchInst::setSuccessorV(unsigned idx, BasicBlock *B) {
 //===----------------------------------------------------------------------===//
 
 void IndirectBrInst::init(Value *Address, unsigned NumDests) {
-  assert(Address && isa<PointerType>(Address->getType()) &&
+  assert(Address && Address->getType()->isPointerTy() &&
          "Address of indirectbr must be a pointer");
   ReservedSpace = 1+NumDests;
   NumOperands = 1;
diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h
index ccca789..9887f28 100644
--- a/lib/VMCore/LLVMContextImpl.h
+++ b/lib/VMCore/LLVMContextImpl.h
@@ -116,6 +116,10 @@ public:
     ConstantStruct, true /*largekey*/> StructConstantsTy;
   StructConstantsTy StructConstants;
   
+  typedef ConstantUniqueMap<Constant*, UnionType, ConstantUnion>
+      UnionConstantsTy;
+  UnionConstantsTy UnionConstants;
+  
   typedef ConstantUniqueMap<std::vector<Constant*>, VectorType,
                             ConstantVector> VectorConstantsTy;
   VectorConstantsTy VectorConstants;
@@ -159,12 +163,16 @@ public:
   TypeMap<PointerValType, PointerType> PointerTypes;
   TypeMap<FunctionValType, FunctionType> FunctionTypes;
   TypeMap<StructValType, StructType> StructTypes;
+  TypeMap<UnionValType, UnionType> UnionTypes;
   TypeMap<IntegerValType, IntegerType> IntegerTypes;
 
   // Opaque types are not structurally uniqued, so don't use TypeMap.
   typedef SmallPtrSet<const OpaqueType*, 8> OpaqueTypesTy;
   OpaqueTypesTy OpaqueTypes;
-  
+
+  /// Used as an abstract type that will never be resolved.
+  OpaqueType *const AlwaysOpaqueTy;
+
 
   /// ValueHandles - This map keeps track of all of the value handles that are
   /// watching a Value*.  The Value::HasValueHandle bit is used to know
@@ -196,7 +204,12 @@ public:
     Int8Ty(C, 8),
     Int16Ty(C, 16),
     Int32Ty(C, 32),
-    Int64Ty(C, 64) { }
+    Int64Ty(C, 64),
+    AlwaysOpaqueTy(new OpaqueType(C)) {
+    // Make sure the AlwaysOpaqueTy stays alive as long as the Context.
+    AlwaysOpaqueTy->addRef();
+    OpaqueTypes.insert(AlwaysOpaqueTy);
+  }
 
   ~LLVMContextImpl() {
     ExprConstants.freeConstants();
@@ -216,12 +229,28 @@ public:
       if (I->second->use_empty())
         delete I->second;
     }
-    MDNodeSet.clear();
+    AlwaysOpaqueTy->dropRef();
     for (OpaqueTypesTy::iterator I = OpaqueTypes.begin(), E = OpaqueTypes.end();
         I != E; ++I) {
       (*I)->AbstractTypeUsers.clear();
       delete *I;
     }
+    // Destroy MDNode operands first.
+    for (FoldingSetIterator<MDNode> I = MDNodeSet.begin(), E = MDNodeSet.end();
+         I != E;) {
+      MDNode *N = &(*I);
+      ++I;
+      N->replaceAllOperandsWithNull();
+    }
+    while (!MDNodeSet.empty()) {
+      MDNode *N = &(*MDNodeSet.begin());
+      N->destroy();
+    }
+    // Destroy MDStrings.
+    for (StringMap<MDString*>::iterator I = MDStringCache.begin(),
+           E = MDStringCache.end(); I != E; ++I) {
+      delete I->second;
+    }
   }
 };
 
diff --git a/lib/VMCore/Makefile b/lib/VMCore/Makefile
index bc5e77d..4395ecf 100644
--- a/lib/VMCore/Makefile
+++ b/lib/VMCore/Makefile
@@ -30,5 +30,5 @@ $(GENFILE): $(ObjDir)/Intrinsics.gen.tmp
 	    changed significantly. )
 
 install-local:: $(GENFILE)
-	$(Echo) Installing $(PROJ_includedir)/llvm/Intrinsics.gen
-	$(Verb) $(DataInstall) $(GENFILE) $(PROJ_includedir)/llvm/Intrinsics.gen
+	$(Echo) Installing $(DESTDIR)$(PROJ_includedir)/llvm/Intrinsics.gen
+	$(Verb) $(DataInstall) $(GENFILE) $(DESTDIR)$(PROJ_includedir)/llvm/Intrinsics.gen
diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp
index 07a5f3c..a08c454 100644
--- a/lib/VMCore/Metadata.cpp
+++ b/lib/VMCore/Metadata.cpp
@@ -257,6 +257,13 @@ void MDNode::Profile(FoldingSetNodeID &ID) const {
     ID.AddPointer(getOperand(i));
 }
 
+// replaceAllOperandsWithNull - This is used while destroying llvm context to 
+// gracefully delete all nodes. This method replaces all operands with null.
+void MDNode::replaceAllOperandsWithNull() {
+  for (MDNodeOperand *Op = getOperandPtr(this, 0), *E = Op+NumOperands;
+       Op != E; ++Op)
+    replaceOperand(Op, 0);
+}
 
 // Replace value from this node's operand list.
 void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) {
diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp
index 2b0b235..a782e5a 100644
--- a/lib/VMCore/Pass.cpp
+++ b/lib/VMCore/Pass.cpp
@@ -194,6 +194,9 @@ PassManagerType BasicBlockPass::getPotentialPassManagerType() const {
 //
 namespace {
 class PassRegistrar {
+  /// Guards the contents of this class.
+  mutable sys::SmartMutex<true> Lock;
+
   /// PassInfoMap - Keep track of the passinfo object for each registered llvm
   /// pass.
   typedef std::map<intptr_t, const PassInfo*> MapType;
@@ -213,16 +216,19 @@ class PassRegistrar {
 public:
   
   const PassInfo *GetPassInfo(intptr_t TI) const {
+    sys::SmartScopedLock<true> Guard(Lock);
     MapType::const_iterator I = PassInfoMap.find(TI);
     return I != PassInfoMap.end() ? I->second : 0;
   }
   
   const PassInfo *GetPassInfo(StringRef Arg) const {
+    sys::SmartScopedLock<true> Guard(Lock);
     StringMapType::const_iterator I = PassInfoStringMap.find(Arg);
     return I != PassInfoStringMap.end() ? I->second : 0;
   }
   
   void RegisterPass(const PassInfo &PI) {
+    sys::SmartScopedLock<true> Guard(Lock);
     bool Inserted =
       PassInfoMap.insert(std::make_pair(PI.getTypeInfo(),&PI)).second;
     assert(Inserted && "Pass registered multiple times!"); Inserted=Inserted;
@@ -230,6 +236,7 @@ public:
   }
   
   void UnregisterPass(const PassInfo &PI) {
+    sys::SmartScopedLock<true> Guard(Lock);
     MapType::iterator I = PassInfoMap.find(PI.getTypeInfo());
     assert(I != PassInfoMap.end() && "Pass registered but not in map!");
     
@@ -239,6 +246,7 @@ public:
   }
   
   void EnumerateWith(PassRegistrationListener *L) {
+    sys::SmartScopedLock<true> Guard(Lock);
     for (MapType::const_iterator I = PassInfoMap.begin(),
          E = PassInfoMap.end(); I != E; ++I)
       L->passEnumerate(I->second);
@@ -249,6 +257,7 @@ public:
   void RegisterAnalysisGroup(PassInfo *InterfaceInfo,
                              const PassInfo *ImplementationInfo,
                              bool isDefault) {
+    sys::SmartScopedLock<true> Guard(Lock);
     AnalysisGroupInfo &AGI = AnalysisGroupInfoMap[InterfaceInfo];
     assert(AGI.Implementations.count(ImplementationInfo) == 0 &&
            "Cannot add a pass to the same analysis group more than once!");
diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp
index a1d554e..c4dfe14 100644
--- a/lib/VMCore/PassManager.cpp
+++ b/lib/VMCore/PassManager.cpp
@@ -1118,6 +1118,7 @@ bool BBPassManager::runOnFunction(Function &F) {
   for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
     for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
       BasicBlockPass *BP = getContainedPass(Index);
+      bool LocalChanged = false;
 
       dumpPassInfo(BP, EXECUTION_MSG, ON_BASICBLOCK_MSG, I->getName());
       dumpRequiredSet(BP);
@@ -1129,11 +1130,12 @@ bool BBPassManager::runOnFunction(Function &F) {
         PassManagerPrettyStackEntry X(BP, *I);
       
         Timer *T = StartPassTimer(BP);
-        Changed |= BP->runOnBasicBlock(*I);
+        LocalChanged |= BP->runOnBasicBlock(*I);
         StopPassTimer(BP, T);
       }
 
-      if (Changed) 
+      Changed |= LocalChanged;
+      if (LocalChanged) 
         dumpPassInfo(BP, MODIFICATION_MSG, ON_BASICBLOCK_MSG,
                      I->getName());
       dumpPreservedSet(BP);
@@ -1220,9 +1222,11 @@ void FunctionPassManager::add(Pass *P) {
 /// so, return true.
 ///
 bool FunctionPassManager::run(Function &F) {
-  std::string errstr;
-  if (F.Materialize(&errstr)) {
-    llvm_report_error("Error reading bitcode file: " + errstr);
+  if (F.isMaterializable()) {
+    std::string errstr;
+    if (F.Materialize(&errstr)) {
+      llvm_report_error("Error reading bitcode file: " + errstr);
+    }
   }
   return FPM->run(F);
 }
@@ -1332,6 +1336,7 @@ bool FPPassManager::runOnFunction(Function &F) {
 
   for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
     FunctionPass *FP = getContainedPass(Index);
+    bool LocalChanged = false;
 
     dumpPassInfo(FP, EXECUTION_MSG, ON_FUNCTION_MSG, F.getName());
     dumpRequiredSet(FP);
@@ -1342,11 +1347,12 @@ bool FPPassManager::runOnFunction(Function &F) {
       PassManagerPrettyStackEntry X(FP, F);
 
       Timer *T = StartPassTimer(FP);
-      Changed |= FP->runOnFunction(F);
+      LocalChanged |= FP->runOnFunction(F);
       StopPassTimer(FP, T);
     }
 
-    if (Changed) 
+    Changed |= LocalChanged;
+    if (LocalChanged)
       dumpPassInfo(FP, MODIFICATION_MSG, ON_FUNCTION_MSG, F.getName());
     dumpPreservedSet(FP);
 
@@ -1405,6 +1411,7 @@ MPPassManager::runOnModule(Module &M) {
 
   for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
     ModulePass *MP = getContainedPass(Index);
+    bool LocalChanged = false;
 
     dumpPassInfo(MP, EXECUTION_MSG, ON_MODULE_MSG, M.getModuleIdentifier());
     dumpRequiredSet(MP);
@@ -1414,11 +1421,12 @@ MPPassManager::runOnModule(Module &M) {
     {
       PassManagerPrettyStackEntry X(MP, M);
       Timer *T = StartPassTimer(MP);
-      Changed |= MP->runOnModule(M);
+      LocalChanged |= MP->runOnModule(M);
       StopPassTimer(MP, T);
     }
 
-    if (Changed) 
+    Changed |= LocalChanged;
+    if (LocalChanged)
       dumpPassInfo(MP, MODIFICATION_MSG, ON_MODULE_MSG,
                    M.getModuleIdentifier());
     dumpPreservedSet(MP);
@@ -1704,8 +1712,13 @@ LLVMPassManagerRef LLVMCreatePassManager() {
   return wrap(new PassManager());
 }
 
+LLVMPassManagerRef LLVMCreateFunctionPassManagerForModule(LLVMModuleRef M) {
+  return wrap(new FunctionPassManager(unwrap(M)));
+}
+
 LLVMPassManagerRef LLVMCreateFunctionPassManager(LLVMModuleProviderRef P) {
-  return wrap(new FunctionPassManager(unwrap(P)));
+  return LLVMCreateFunctionPassManagerForModule(
+                                            reinterpret_cast<LLVMModuleRef>(P));
 }
 
 LLVMBool LLVMRunPassManager(LLVMPassManagerRef PM, LLVMModuleRef M) {
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
index 044de4f..9b2c2ca 100644
--- a/lib/VMCore/Type.cpp
+++ b/lib/VMCore/Type.cpp
@@ -50,8 +50,8 @@ void AbstractTypeUser::setType(Value *V, const Type *NewTy) {
 
 /// Because of the way Type subclasses are allocated, this function is necessary
 /// to use the correct kind of "delete" operator to deallocate the Type object.
-/// Some type objects (FunctionTy, StructTy) allocate additional space after 
-/// the space for their derived type to hold the contained types array of
+/// Some type objects (FunctionTy, StructTy, UnionTy) allocate additional space
+/// after the space for their derived type to hold the contained types array of
 /// PATypeHandles. Using this allocation scheme means all the PATypeHandles are
 /// allocated with the type object, decreasing allocations and eliminating the
 /// need for a std::vector to be used in the Type class itself. 
@@ -61,7 +61,8 @@ void Type::destroy() const {
   // Structures and Functions allocate their contained types past the end of
   // the type object itself. These need to be destroyed differently than the
   // other types.
-  if (isa<FunctionType>(this) || isa<StructType>(this)) {
+  if (this->isFunctionTy() || this->isStructTy() ||
+      this->isUnionTy()) {
     // First, make sure we destruct any PATypeHandles allocated by these
     // subclasses.  They must be manually destructed. 
     for (unsigned i = 0; i < NumContainedTys; ++i)
@@ -69,10 +70,12 @@ void Type::destroy() const {
 
     // Now call the destructor for the subclass directly because we're going
     // to delete this as an array of char.
-    if (isa<FunctionType>(this))
+    if (this->isFunctionTy())
       static_cast<const FunctionType*>(this)->FunctionType::~FunctionType();
-    else
+    else if (this->isStructTy())
       static_cast<const StructType*>(this)->StructType::~StructType();
+    else
+      static_cast<const UnionType*>(this)->UnionType::~UnionType();
 
     // Finally, remove the memory as an array deallocation of the chars it was
     // constructed from.
@@ -124,32 +127,32 @@ const Type *Type::getScalarType() const {
   return this;
 }
 
-/// isInteger - Return true if this is an IntegerType of the specified width.
-bool Type::isInteger(unsigned Bitwidth) const {
-  return isInteger() && cast<IntegerType>(this)->getBitWidth() == Bitwidth;
+/// isIntegerTy - Return true if this is an IntegerType of the specified width.
+bool Type::isIntegerTy(unsigned Bitwidth) const {
+  return isIntegerTy() && cast<IntegerType>(this)->getBitWidth() == Bitwidth;
 }
 
-/// isIntOrIntVector - Return true if this is an integer type or a vector of
+/// isIntOrIntVectorTy - Return true if this is an integer type or a vector of
 /// integer types.
 ///
-bool Type::isIntOrIntVector() const {
-  if (isInteger())
+bool Type::isIntOrIntVectorTy() const {
+  if (isIntegerTy())
     return true;
   if (ID != Type::VectorTyID) return false;
   
-  return cast<VectorType>(this)->getElementType()->isInteger();
+  return cast<VectorType>(this)->getElementType()->isIntegerTy();
 }
 
-/// isFPOrFPVector - Return true if this is a FP type or a vector of FP types.
+/// isFPOrFPVectorTy - Return true if this is a FP type or a vector of FP types.
 ///
-bool Type::isFPOrFPVector() const {
+bool Type::isFPOrFPVectorTy() const {
   if (ID == Type::FloatTyID || ID == Type::DoubleTyID || 
       ID == Type::FP128TyID || ID == Type::X86_FP80TyID || 
       ID == Type::PPC_FP128TyID)
     return true;
   if (ID != Type::VectorTyID) return false;
   
-  return cast<VectorType>(this)->getElementType()->isFloatingPoint();
+  return cast<VectorType>(this)->getElementType()->isFloatingPointTy();
 }
 
 // canLosslesslyBitCastTo - Return true if this type can be converted to
@@ -173,8 +176,8 @@ bool Type::canLosslesslyBitCastTo(const Type *Ty) const {
   // At this point we have only various mismatches of the first class types
   // remaining and ptr->ptr. Just select the lossless conversions. Everything
   // else is not lossless.
-  if (isa<PointerType>(this))
-    return isa<PointerType>(Ty);
+  if (this->isPointerTy())
+    return Ty->isPointerTy();
   return false;  // Other types have no identity values
 }
 
@@ -204,7 +207,7 @@ unsigned Type::getScalarSizeInBits() const {
 int Type::getFPMantissaWidth() const {
   if (const VectorType *VTy = dyn_cast<VectorType>(this))
     return VTy->getElementType()->getFPMantissaWidth();
-  assert(isFloatingPoint() && "Not a floating point type!");
+  assert(isFloatingPointTy() && "Not a floating point type!");
   if (ID == FloatTyID) return 24;
   if (ID == DoubleTyID) return 53;
   if (ID == X86_FP80TyID) return 64;
@@ -217,7 +220,7 @@ int Type::getFPMantissaWidth() const {
 /// iff all of the members of the type are sized as well.  Since asking for
 /// their size is relatively uncommon, move this operation out of line.
 bool Type::isSizedDerivedType() const {
-  if (isa<IntegerType>(this))
+  if (this->isIntegerTy())
     return true;
 
   if (const ArrayType *ATy = dyn_cast<ArrayType>(this))
@@ -226,7 +229,7 @@ bool Type::isSizedDerivedType() const {
   if (const VectorType *PTy = dyn_cast<VectorType>(this))
     return PTy->getElementType()->isSized();
 
-  if (!isa<StructType>(this)) 
+  if (!this->isStructTy() && !this->isUnionTy()) 
     return false;
 
   // Okay, our struct is sized if all of the elements are...
@@ -285,7 +288,7 @@ std::string Type::getDescription() const {
 
 bool StructType::indexValid(const Value *V) const {
   // Structure indexes require 32-bit integer constants.
-  if (V->getType()->isInteger(32))
+  if (V->getType()->isIntegerTy(32))
     if (const ConstantInt *CU = dyn_cast<ConstantInt>(V))
       return indexValid(CU->getZExtValue());
   return false;
@@ -308,6 +311,32 @@ const Type *StructType::getTypeAtIndex(unsigned Idx) const {
   return ContainedTys[Idx];
 }
 
+
+bool UnionType::indexValid(const Value *V) const {
+  // Union indexes require 32-bit integer constants.
+  if (V->getType()->isIntegerTy(32))
+    if (const ConstantInt *CU = dyn_cast<ConstantInt>(V))
+      return indexValid(CU->getZExtValue());
+  return false;
+}
+
+bool UnionType::indexValid(unsigned V) const {
+  return V < NumContainedTys;
+}
+
+// getTypeAtIndex - Given an index value into the type, return the type of the
+// element.  For a structure type, this must be a constant value...
+//
+const Type *UnionType::getTypeAtIndex(const Value *V) const {
+  unsigned Idx = (unsigned)cast<ConstantInt>(V)->getZExtValue();
+  return getTypeAtIndex(Idx);
+}
+
+const Type *UnionType::getTypeAtIndex(unsigned Idx) const {
+  assert(indexValid(Idx) && "Invalid structure index!");
+  return ContainedTys[Idx];
+}
+
 //===----------------------------------------------------------------------===//
 //                          Primitive 'Type' data
 //===----------------------------------------------------------------------===//
@@ -418,7 +447,7 @@ bool FunctionType::isValidReturnType(const Type *RetTy) {
 /// isValidArgumentType - Return true if the specified type is valid as an
 /// argument type.
 bool FunctionType::isValidArgumentType(const Type *ArgTy) {
-  return ArgTy->isFirstClassType() || isa<OpaqueType>(ArgTy);
+  return ArgTy->isFirstClassType() || ArgTy->isOpaqueTy();
 }
 
 FunctionType::FunctionType(const Type *Result,
@@ -463,6 +492,23 @@ StructType::StructType(LLVMContext &C,
   setAbstract(isAbstract);
 }
 
+UnionType::UnionType(LLVMContext &C,const Type* const* Types, unsigned NumTypes)
+  : CompositeType(C, UnionTyID) {
+  ContainedTys = reinterpret_cast<PATypeHandle*>(this + 1);
+  NumContainedTys = NumTypes;
+  bool isAbstract = false;
+  for (unsigned i = 0; i < NumTypes; ++i) {
+    assert(Types[i] && "<null> type for union field!");
+    assert(isValidElementType(Types[i]) &&
+           "Invalid type for union element!");
+    new (&ContainedTys[i]) PATypeHandle(Types[i], this);
+    isAbstract |= Types[i]->isAbstract();
+  }
+
+  // Calculate whether or not this type is abstract
+  setAbstract(isAbstract);
+}
+
 ArrayType::ArrayType(const Type *ElType, uint64_t NumEl)
   : SequentialType(ArrayTyID, ElType) {
   NumElements = NumEl;
@@ -507,30 +553,7 @@ void DerivedType::dropAllTypeUses() {
   if (NumContainedTys != 0) {
     // The type must stay abstract.  To do this, we insert a pointer to a type
     // that will never get resolved, thus will always be abstract.
-    static Type *AlwaysOpaqueTy = 0;
-    static PATypeHolder* Holder = 0;
-    Type *tmp = AlwaysOpaqueTy;
-    if (llvm_is_multithreaded()) {
-      sys::MemoryFence();
-      if (!tmp) {
-        llvm_acquire_global_lock();
-        tmp = AlwaysOpaqueTy;
-        if (!tmp) {
-          tmp = OpaqueType::get(getContext());
-          PATypeHolder* tmp2 = new PATypeHolder(tmp);
-          sys::MemoryFence();
-          AlwaysOpaqueTy = tmp;
-          Holder = tmp2;
-        }
-      
-        llvm_release_global_lock();
-      }
-    } else if (!AlwaysOpaqueTy) {
-      AlwaysOpaqueTy = OpaqueType::get(getContext());
-      Holder = new PATypeHolder(AlwaysOpaqueTy);
-    } 
-        
-    ContainedTys[0] = AlwaysOpaqueTy;
+    ContainedTys[0] = getContext().pImpl->AlwaysOpaqueTy;
 
     // Change the rest of the types to be Int32Ty's.  It doesn't matter what we
     // pick so long as it doesn't point back to this type.  We choose something
@@ -590,7 +613,7 @@ void Type::PromoteAbstractToConcrete() {
     // Concrete types are leaves in the tree.  Since an SCC will either be all
     // abstract or all concrete, we only need to check one type.
     if (SCC[0]->isAbstract()) {
-      if (isa<OpaqueType>(SCC[0]))
+      if (SCC[0]->isOpaqueTy())
         return;     // Not going to be concrete, sorry.
 
       // If all of the children of all of the types in this SCC are concrete,
@@ -637,7 +660,7 @@ static bool TypesEqual(const Type *Ty, const Type *Ty2,
                        std::map<const Type *, const Type *> &EqTypes) {
   if (Ty == Ty2) return true;
   if (Ty->getTypeID() != Ty2->getTypeID()) return false;
-  if (isa<OpaqueType>(Ty))
+  if (Ty->isOpaqueTy())
     return false;  // Two unequal opaque types are never equal
 
   std::map<const Type*, const Type*>::iterator It = EqTypes.find(Ty);
@@ -667,6 +690,13 @@ static bool TypesEqual(const Type *Ty, const Type *Ty2,
       if (!TypesEqual(STy->getElementType(i), STy2->getElementType(i), EqTypes))
         return false;
     return true;
+  } else if (const UnionType *UTy = dyn_cast<UnionType>(Ty)) {
+    const UnionType *UTy2 = cast<UnionType>(Ty2);
+    if (UTy->getNumElements() != UTy2->getNumElements()) return false;
+    for (unsigned i = 0, e = UTy2->getNumElements(); i != e; ++i)
+      if (!TypesEqual(UTy->getElementType(i), UTy2->getElementType(i), EqTypes))
+        return false;
+    return true;
   } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
     const ArrayType *ATy2 = cast<ArrayType>(Ty2);
     return ATy->getNumElements() == ATy2->getNumElements() &&
@@ -858,7 +888,7 @@ ArrayType *ArrayType::get(const Type *ElementType, uint64_t NumElements) {
 
 bool ArrayType::isValidElementType(const Type *ElemTy) {
   return ElemTy->getTypeID() != VoidTyID && ElemTy->getTypeID() != LabelTyID &&
-         ElemTy->getTypeID() != MetadataTyID && !isa<FunctionType>(ElemTy);
+         ElemTy->getTypeID() != MetadataTyID && !ElemTy->isFunctionTy();
 }
 
 VectorType *VectorType::get(const Type *ElementType, unsigned NumElements) {
@@ -881,8 +911,8 @@ VectorType *VectorType::get(const Type *ElementType, unsigned NumElements) {
 }
 
 bool VectorType::isValidElementType(const Type *ElemTy) {
-  return ElemTy->isInteger() || ElemTy->isFloatingPoint() ||
-         isa<OpaqueType>(ElemTy);
+  return ElemTy->isIntegerTy() || ElemTy->isFloatingPointTy() ||
+         ElemTy->isOpaqueTy();
 }
 
 //===----------------------------------------------------------------------===//
@@ -924,12 +954,66 @@ StructType *StructType::get(LLVMContext &Context, const Type *type, ...) {
 }
 
 bool StructType::isValidElementType(const Type *ElemTy) {
-  return ElemTy->getTypeID() != VoidTyID && ElemTy->getTypeID() != LabelTyID &&
-         ElemTy->getTypeID() != MetadataTyID && !isa<FunctionType>(ElemTy);
+  return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
+         !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy();
 }
 
 
 //===----------------------------------------------------------------------===//
+// Union Type Factory...
+//
+
+UnionType *UnionType::get(const Type* const* Types, unsigned NumTypes) {
+  assert(NumTypes > 0 && "union must have at least one member type!");
+  UnionValType UTV(Types, NumTypes);
+  UnionType *UT = 0;
+  
+  LLVMContextImpl *pImpl = Types[0]->getContext().pImpl;
+  
+  UT = pImpl->UnionTypes.get(UTV);
+    
+  if (!UT) {
+    // Value not found.  Derive a new type!
+    UT = (UnionType*) operator new(sizeof(UnionType) +
+                                   sizeof(PATypeHandle) * NumTypes);
+    new (UT) UnionType(Types[0]->getContext(), Types, NumTypes);
+    pImpl->UnionTypes.add(UTV, UT);
+  }
+#ifdef DEBUG_MERGE_TYPES
+  DEBUG(dbgs() << "Derived new type: " << *UT << "\n");
+#endif
+  return UT;
+}
+
+UnionType *UnionType::get(const Type *type, ...) {
+  va_list ap;
+  SmallVector<const llvm::Type*, 8> UnionFields;
+  va_start(ap, type);
+  while (type) {
+    UnionFields.push_back(type);
+    type = va_arg(ap, llvm::Type*);
+  }
+  unsigned NumTypes = UnionFields.size();
+  assert(NumTypes > 0 && "union must have at least one member type!");
+  return llvm::UnionType::get(&UnionFields[0], NumTypes);
+}
+
+bool UnionType::isValidElementType(const Type *ElemTy) {
+  return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
+         !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy();
+}
+
+int UnionType::getElementTypeIndex(const Type *ElemTy) const {
+  int index = 0;
+  for (UnionType::element_iterator I = element_begin(), E = element_end();
+       I != E; ++I, ++index) {
+     if (ElemTy == *I) return index;
+  }
+  
+  return -1;
+}
+
+//===----------------------------------------------------------------------===//
 // Pointer Type Factory...
 //
 
@@ -1192,6 +1276,21 @@ void StructType::typeBecameConcrete(const DerivedType *AbsTy) {
 // concrete - this could potentially change us from an abstract type to a
 // concrete type.
 //
+void UnionType::refineAbstractType(const DerivedType *OldType,
+                                    const Type *NewType) {
+  LLVMContextImpl *pImpl = OldType->getContext().pImpl;
+  pImpl->UnionTypes.RefineAbstractType(this, OldType, NewType);
+}
+
+void UnionType::typeBecameConcrete(const DerivedType *AbsTy) {
+  LLVMContextImpl *pImpl = AbsTy->getContext().pImpl;
+  pImpl->UnionTypes.TypeBecameConcrete(this, AbsTy);
+}
+
+// refineAbstractType - Called when a contained type is found to be more
+// concrete - this could potentially change us from an abstract type to a
+// concrete type.
+//
 void PointerType::refineAbstractType(const DerivedType *OldType,
                                      const Type *NewType) {
   LLVMContextImpl *pImpl = OldType->getContext().pImpl;
@@ -1204,7 +1303,7 @@ void PointerType::typeBecameConcrete(const DerivedType *AbsTy) {
 }
 
 bool SequentialType::indexValid(const Value *V) const {
-  if (isa<IntegerType>(V->getType())) 
+  if (V->getType()->isIntegerTy()) 
     return true;
   return false;
 }
diff --git a/lib/VMCore/TypesContext.h b/lib/VMCore/TypesContext.h
index 4842845..02ab113 100644
--- a/lib/VMCore/TypesContext.h
+++ b/lib/VMCore/TypesContext.h
@@ -180,6 +180,32 @@ public:
   }
 };
 
+// UnionValType - Define a class to hold the key that goes into the TypeMap
+//
+class UnionValType {
+  std::vector<const Type*> ElTypes;
+public:
+  UnionValType(const Type* const* Types, unsigned NumTypes)
+    : ElTypes(&Types[0], &Types[NumTypes]) {}
+
+  static UnionValType get(const UnionType *UT) {
+    std::vector<const Type *> ElTypes;
+    ElTypes.reserve(UT->getNumElements());
+    for (unsigned i = 0, e = UT->getNumElements(); i != e; ++i)
+      ElTypes.push_back(UT->getElementType(i));
+
+    return UnionValType(&ElTypes[0], ElTypes.size());
+  }
+
+  static unsigned hashTypeStructure(const UnionType *UT) {
+    return UT->getNumElements();
+  }
+
+  inline bool operator<(const UnionValType &UTV) const {
+    return (ElTypes < UTV.ElTypes);
+  }
+};
+
 // FunctionValType - Define a class to hold the key that goes into the TypeMap
 //
 class FunctionValType {
diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp
index 3759b8a..a36d262 100644
--- a/lib/VMCore/Value.cpp
+++ b/lib/VMCore/Value.cpp
@@ -45,11 +45,11 @@ Value::Value(const Type *ty, unsigned scid)
     UseList(0), Name(0) {
   if (isa<CallInst>(this) || isa<InvokeInst>(this))
     assert((VTy->isFirstClassType() || VTy->isVoidTy() ||
-            isa<OpaqueType>(ty) || VTy->getTypeID() == Type::StructTyID) &&
+            ty->isOpaqueTy() || VTy->isStructTy()) &&
            "invalid CallInst  type!");
   else if (!isa<Constant>(this) && !isa<BasicBlock>(this))
     assert((VTy->isFirstClassType() || VTy->isVoidTy() ||
-            isa<OpaqueType>(ty)) &&
+            ty->isOpaqueTy()) &&
            "Cannot create non-first-class values except for constants!");
 }
 
@@ -320,7 +320,7 @@ void Value::replaceAllUsesWith(Value *New) {
 }
 
 Value *Value::stripPointerCasts() {
-  if (!isa<PointerType>(getType()))
+  if (!getType()->isPointerTy())
     return this;
   Value *V = this;
   do {
@@ -337,12 +337,12 @@ Value *Value::stripPointerCasts() {
     } else {
       return V;
     }
-    assert(isa<PointerType>(V->getType()) && "Unexpected operand type!");
+    assert(V->getType()->isPointerTy() && "Unexpected operand type!");
   } while (1);
 }
 
 Value *Value::getUnderlyingObject(unsigned MaxLookup) {
-  if (!isa<PointerType>(getType()))
+  if (!getType()->isPointerTy())
     return this;
   Value *V = this;
   for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
@@ -357,7 +357,7 @@ Value *Value::getUnderlyingObject(unsigned MaxLookup) {
     } else {
       return V;
     }
-    assert(isa<PointerType>(V->getType()) && "Unexpected operand type!");
+    assert(V->getType()->isPointerTy() && "Unexpected operand type!");
   }
   return V;
 }
diff --git a/lib/VMCore/ValueTypes.cpp b/lib/VMCore/ValueTypes.cpp
index 7f9a6cd..a092cd1 100644
--- a/lib/VMCore/ValueTypes.cpp
+++ b/lib/VMCore/ValueTypes.cpp
@@ -36,17 +36,17 @@ EVT EVT::getExtendedVectorVT(LLVMContext &Context, EVT VT,
 
 bool EVT::isExtendedFloatingPoint() const {
   assert(isExtended() && "Type is not extended!");
-  return LLVMTy->isFPOrFPVector();
+  return LLVMTy->isFPOrFPVectorTy();
 }
 
 bool EVT::isExtendedInteger() const {
   assert(isExtended() && "Type is not extended!");
-  return LLVMTy->isIntOrIntVector();
+  return LLVMTy->isIntOrIntVectorTy();
 }
 
 bool EVT::isExtendedVector() const {
   assert(isExtended() && "Type is not extended!");
-  return isa<VectorType>(LLVMTy);
+  return LLVMTy->isVectorTy();
 }
 
 bool EVT::isExtended64BitVector() const {
@@ -126,6 +126,7 @@ std::string EVT::getEVTString() const {
   case MVT::v8f32:   return "v8f32";
   case MVT::v2f64:   return "v2f64";
   case MVT::v4f64:   return "v4f64";
+  case MVT::Metadata:return "Metadata";
   }
 }
 
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index d0e8d30..721e96a 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -161,7 +161,8 @@ namespace {
     VerifierFailureAction action;
                           // What to do if verification fails.
     Module *Mod;          // Module we are verifying right now
-    DominatorTree *DT; // Dominator Tree, caution can be null!
+    LLVMContext *Context; // Context within which we are verifying
+    DominatorTree *DT;    // Dominator Tree, caution can be null!
 
     std::string Messages;
     raw_string_ostream MessagesStr;
@@ -178,24 +179,25 @@ namespace {
     Verifier()
       : FunctionPass(&ID), 
       Broken(false), RealPass(true), action(AbortProcessAction),
-      DT(0), MessagesStr(Messages) {}
+      Mod(0), Context(0), DT(0), MessagesStr(Messages) {}
     explicit Verifier(VerifierFailureAction ctn)
       : FunctionPass(&ID), 
-      Broken(false), RealPass(true), action(ctn), DT(0),
+      Broken(false), RealPass(true), action(ctn), Mod(0), Context(0), DT(0),
       MessagesStr(Messages) {}
     explicit Verifier(bool AB)
       : FunctionPass(&ID), 
       Broken(false), RealPass(true),
-      action( AB ? AbortProcessAction : PrintMessageAction), DT(0),
-      MessagesStr(Messages) {}
+      action( AB ? AbortProcessAction : PrintMessageAction), Mod(0),
+      Context(0), DT(0), MessagesStr(Messages) {}
     explicit Verifier(DominatorTree &dt)
       : FunctionPass(&ID), 
-      Broken(false), RealPass(false), action(PrintMessageAction),
-      DT(&dt), MessagesStr(Messages) {}
+      Broken(false), RealPass(false), action(PrintMessageAction), Mod(0),
+      Context(0), DT(&dt), MessagesStr(Messages) {}
 
 
     bool doInitialization(Module &M) {
       Mod = &M;
+      Context = &M.getContext();
       verifyTypeSymbolTable(M.getTypeSymbolTable());
 
       // If this is a real pass, in a pass manager, we must abort before
@@ -211,6 +213,7 @@ namespace {
       if (RealPass) DT = &getAnalysis<DominatorTree>();
 
       Mod = F.getParent();
+      if (!Context) Context = &F.getContext();
 
       visit(F);
       InstsInThisBlock.clear();
@@ -314,6 +317,7 @@ namespace {
     void visitStoreInst(StoreInst &SI);
     void visitInstruction(Instruction &I);
     void visitTerminatorInst(TerminatorInst &I);
+    void visitBranchInst(BranchInst &BI);
     void visitReturnInst(ReturnInst &RI);
     void visitSwitchInst(SwitchInst &SI);
     void visitSelectInst(SelectInst &SI);
@@ -429,7 +433,7 @@ void Verifier::visitGlobalValue(GlobalValue &GV) {
 
   if (GV.hasAppendingLinkage()) {
     GlobalVariable *GVar = dyn_cast<GlobalVariable>(&GV);
-    Assert1(GVar && isa<ArrayType>(GVar->getType()->getElementType()),
+    Assert1(GVar && GVar->getType()->getElementType()->isArrayTy(),
             "Only global arrays can have appending linkage!", GVar);
   }
 }
@@ -596,13 +600,16 @@ void Verifier::visitFunction(Function &F) {
   const FunctionType *FT = F.getFunctionType();
   unsigned NumArgs = F.arg_size();
 
+  Assert1(Context == &F.getContext(),
+          "Function context does not match Module context!", &F);
+
   Assert1(!F.hasCommonLinkage(), "Functions may not have common linkage", &F);
   Assert2(FT->getNumParams() == NumArgs,
           "# formal arguments must match # of arguments for function type!",
           &F, FT);
   Assert1(F.getReturnType()->isFirstClassType() ||
           F.getReturnType()->isVoidTy() || 
-          isa<StructType>(F.getReturnType()),
+          F.getReturnType()->isStructTy(),
           "Functions cannot return aggregate values!", &F);
 
   Assert1(!F.hasStructRetAttr() || F.getReturnType()->isVoidTy(),
@@ -743,6 +750,14 @@ void Verifier::visitTerminatorInst(TerminatorInst &I) {
   visitInstruction(I);
 }
 
+void Verifier::visitBranchInst(BranchInst &BI) {
+  if (BI.isConditional()) {
+    Assert2(BI.getCondition()->getType()->isIntegerTy(1),
+            "Branch condition is not 'i1' type!", &BI, BI.getCondition());
+  }
+  visitTerminatorInst(BI);
+}
+
 void Verifier::visitReturnInst(ReturnInst &RI) {
   Function *F = RI.getParent()->getParent();
   unsigned N = RI.getNumOperands();
@@ -821,9 +836,9 @@ void Verifier::visitTruncInst(TruncInst &I) {
   unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
   unsigned DestBitSize = DestTy->getScalarSizeInBits();
 
-  Assert1(SrcTy->isIntOrIntVector(), "Trunc only operates on integer", &I);
-  Assert1(DestTy->isIntOrIntVector(), "Trunc only produces integer", &I);
-  Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy),
+  Assert1(SrcTy->isIntOrIntVectorTy(), "Trunc only operates on integer", &I);
+  Assert1(DestTy->isIntOrIntVectorTy(), "Trunc only produces integer", &I);
+  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
           "trunc source and destination must both be a vector or neither", &I);
   Assert1(SrcBitSize > DestBitSize,"DestTy too big for Trunc", &I);
 
@@ -836,9 +851,9 @@ void Verifier::visitZExtInst(ZExtInst &I) {
   const Type *DestTy = I.getType();
 
   // Get the size of the types in bits, we'll need this later
-  Assert1(SrcTy->isIntOrIntVector(), "ZExt only operates on integer", &I);
-  Assert1(DestTy->isIntOrIntVector(), "ZExt only produces an integer", &I);
-  Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy),
+  Assert1(SrcTy->isIntOrIntVectorTy(), "ZExt only operates on integer", &I);
+  Assert1(DestTy->isIntOrIntVectorTy(), "ZExt only produces an integer", &I);
+  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
           "zext source and destination must both be a vector or neither", &I);
   unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
   unsigned DestBitSize = DestTy->getScalarSizeInBits();
@@ -857,9 +872,9 @@ void Verifier::visitSExtInst(SExtInst &I) {
   unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
   unsigned DestBitSize = DestTy->getScalarSizeInBits();
 
-  Assert1(SrcTy->isIntOrIntVector(), "SExt only operates on integer", &I);
-  Assert1(DestTy->isIntOrIntVector(), "SExt only produces an integer", &I);
-  Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy),
+  Assert1(SrcTy->isIntOrIntVectorTy(), "SExt only operates on integer", &I);
+  Assert1(DestTy->isIntOrIntVectorTy(), "SExt only produces an integer", &I);
+  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
           "sext source and destination must both be a vector or neither", &I);
   Assert1(SrcBitSize < DestBitSize,"Type too small for SExt", &I);
 
@@ -874,9 +889,9 @@ void Verifier::visitFPTruncInst(FPTruncInst &I) {
   unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
   unsigned DestBitSize = DestTy->getScalarSizeInBits();
 
-  Assert1(SrcTy->isFPOrFPVector(),"FPTrunc only operates on FP", &I);
-  Assert1(DestTy->isFPOrFPVector(),"FPTrunc only produces an FP", &I);
-  Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy),
+  Assert1(SrcTy->isFPOrFPVectorTy(),"FPTrunc only operates on FP", &I);
+  Assert1(DestTy->isFPOrFPVectorTy(),"FPTrunc only produces an FP", &I);
+  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
           "fptrunc source and destination must both be a vector or neither",&I);
   Assert1(SrcBitSize > DestBitSize,"DestTy too big for FPTrunc", &I);
 
@@ -892,9 +907,9 @@ void Verifier::visitFPExtInst(FPExtInst &I) {
   unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
   unsigned DestBitSize = DestTy->getScalarSizeInBits();
 
-  Assert1(SrcTy->isFPOrFPVector(),"FPExt only operates on FP", &I);
-  Assert1(DestTy->isFPOrFPVector(),"FPExt only produces an FP", &I);
-  Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy),
+  Assert1(SrcTy->isFPOrFPVectorTy(),"FPExt only operates on FP", &I);
+  Assert1(DestTy->isFPOrFPVectorTy(),"FPExt only produces an FP", &I);
+  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
           "fpext source and destination must both be a vector or neither", &I);
   Assert1(SrcBitSize < DestBitSize,"DestTy too small for FPExt", &I);
 
@@ -906,14 +921,14 @@ void Verifier::visitUIToFPInst(UIToFPInst &I) {
   const Type *SrcTy = I.getOperand(0)->getType();
   const Type *DestTy = I.getType();
 
-  bool SrcVec = isa<VectorType>(SrcTy);
-  bool DstVec = isa<VectorType>(DestTy);
+  bool SrcVec = SrcTy->isVectorTy();
+  bool DstVec = DestTy->isVectorTy();
 
   Assert1(SrcVec == DstVec,
           "UIToFP source and dest must both be vector or scalar", &I);
-  Assert1(SrcTy->isIntOrIntVector(),
+  Assert1(SrcTy->isIntOrIntVectorTy(),
           "UIToFP source must be integer or integer vector", &I);
-  Assert1(DestTy->isFPOrFPVector(),
+  Assert1(DestTy->isFPOrFPVectorTy(),
           "UIToFP result must be FP or FP vector", &I);
 
   if (SrcVec && DstVec)
@@ -929,14 +944,14 @@ void Verifier::visitSIToFPInst(SIToFPInst &I) {
   const Type *SrcTy = I.getOperand(0)->getType();
   const Type *DestTy = I.getType();
 
-  bool SrcVec = isa<VectorType>(SrcTy);
-  bool DstVec = isa<VectorType>(DestTy);
+  bool SrcVec = SrcTy->isVectorTy();
+  bool DstVec = DestTy->isVectorTy();
 
   Assert1(SrcVec == DstVec,
           "SIToFP source and dest must both be vector or scalar", &I);
-  Assert1(SrcTy->isIntOrIntVector(),
+  Assert1(SrcTy->isIntOrIntVectorTy(),
           "SIToFP source must be integer or integer vector", &I);
-  Assert1(DestTy->isFPOrFPVector(),
+  Assert1(DestTy->isFPOrFPVectorTy(),
           "SIToFP result must be FP or FP vector", &I);
 
   if (SrcVec && DstVec)
@@ -952,13 +967,14 @@ void Verifier::visitFPToUIInst(FPToUIInst &I) {
   const Type *SrcTy = I.getOperand(0)->getType();
   const Type *DestTy = I.getType();
 
-  bool SrcVec = isa<VectorType>(SrcTy);
-  bool DstVec = isa<VectorType>(DestTy);
+  bool SrcVec = SrcTy->isVectorTy();
+  bool DstVec = DestTy->isVectorTy();
 
   Assert1(SrcVec == DstVec,
           "FPToUI source and dest must both be vector or scalar", &I);
-  Assert1(SrcTy->isFPOrFPVector(), "FPToUI source must be FP or FP vector", &I);
-  Assert1(DestTy->isIntOrIntVector(),
+  Assert1(SrcTy->isFPOrFPVectorTy(), "FPToUI source must be FP or FP vector",
+          &I);
+  Assert1(DestTy->isIntOrIntVectorTy(),
           "FPToUI result must be integer or integer vector", &I);
 
   if (SrcVec && DstVec)
@@ -974,14 +990,14 @@ void Verifier::visitFPToSIInst(FPToSIInst &I) {
   const Type *SrcTy = I.getOperand(0)->getType();
   const Type *DestTy = I.getType();
 
-  bool SrcVec = isa<VectorType>(SrcTy);
-  bool DstVec = isa<VectorType>(DestTy);
+  bool SrcVec = SrcTy->isVectorTy();
+  bool DstVec = DestTy->isVectorTy();
 
   Assert1(SrcVec == DstVec,
           "FPToSI source and dest must both be vector or scalar", &I);
-  Assert1(SrcTy->isFPOrFPVector(),
+  Assert1(SrcTy->isFPOrFPVectorTy(),
           "FPToSI source must be FP or FP vector", &I);
-  Assert1(DestTy->isIntOrIntVector(),
+  Assert1(DestTy->isIntOrIntVectorTy(),
           "FPToSI result must be integer or integer vector", &I);
 
   if (SrcVec && DstVec)
@@ -997,8 +1013,8 @@ void Verifier::visitPtrToIntInst(PtrToIntInst &I) {
   const Type *SrcTy = I.getOperand(0)->getType();
   const Type *DestTy = I.getType();
 
-  Assert1(isa<PointerType>(SrcTy), "PtrToInt source must be pointer", &I);
-  Assert1(DestTy->isInteger(), "PtrToInt result must be integral", &I);
+  Assert1(SrcTy->isPointerTy(), "PtrToInt source must be pointer", &I);
+  Assert1(DestTy->isIntegerTy(), "PtrToInt result must be integral", &I);
 
   visitInstruction(I);
 }
@@ -1008,8 +1024,8 @@ void Verifier::visitIntToPtrInst(IntToPtrInst &I) {
   const Type *SrcTy = I.getOperand(0)->getType();
   const Type *DestTy = I.getType();
 
-  Assert1(SrcTy->isInteger(), "IntToPtr source must be an integral", &I);
-  Assert1(isa<PointerType>(DestTy), "IntToPtr result must be a pointer",&I);
+  Assert1(SrcTy->isIntegerTy(), "IntToPtr source must be an integral", &I);
+  Assert1(DestTy->isPointerTy(), "IntToPtr result must be a pointer",&I);
 
   visitInstruction(I);
 }
@@ -1025,7 +1041,7 @@ void Verifier::visitBitCastInst(BitCastInst &I) {
 
   // BitCast implies a no-op cast of type only. No bits change.
   // However, you can't cast pointers to anything but pointers.
-  Assert1(isa<PointerType>(DestTy) == isa<PointerType>(DestTy),
+  Assert1(DestTy->isPointerTy() == DestTy->isPointerTy(),
           "Bitcast requires both operands to be pointer or neither", &I);
   Assert1(SrcBitSize == DestBitSize, "Bitcast requires types of same width",&I);
 
@@ -1068,11 +1084,11 @@ void Verifier::visitPHINode(PHINode &PN) {
 void Verifier::VerifyCallSite(CallSite CS) {
   Instruction *I = CS.getInstruction();
 
-  Assert1(isa<PointerType>(CS.getCalledValue()->getType()),
+  Assert1(CS.getCalledValue()->getType()->isPointerTy(),
           "Called function must be a pointer!", I);
   const PointerType *FPTy = cast<PointerType>(CS.getCalledValue()->getType());
 
-  Assert1(isa<FunctionType>(FPTy->getElementType()),
+  Assert1(FPTy->getElementType()->isFunctionTy(),
           "Called function is not pointer to function type!", I);
   const FunctionType *FTy = cast<FunctionType>(FPTy->getElementType());
 
@@ -1151,7 +1167,7 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) {
   case Instruction::UDiv:
   case Instruction::SRem:
   case Instruction::URem:
-    Assert1(B.getType()->isIntOrIntVector(),
+    Assert1(B.getType()->isIntOrIntVectorTy(),
             "Integer arithmetic operators only work with integral types!", &B);
     Assert1(B.getType() == B.getOperand(0)->getType(),
             "Integer arithmetic operators must have same type "
@@ -1164,7 +1180,7 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) {
   case Instruction::FMul:
   case Instruction::FDiv:
   case Instruction::FRem:
-    Assert1(B.getType()->isFPOrFPVector(),
+    Assert1(B.getType()->isFPOrFPVectorTy(),
             "Floating-point arithmetic operators only work with "
             "floating-point types!", &B);
     Assert1(B.getType() == B.getOperand(0)->getType(),
@@ -1175,7 +1191,7 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) {
   case Instruction::And:
   case Instruction::Or:
   case Instruction::Xor:
-    Assert1(B.getType()->isIntOrIntVector(),
+    Assert1(B.getType()->isIntOrIntVectorTy(),
             "Logical operators only work with integral types!", &B);
     Assert1(B.getType() == B.getOperand(0)->getType(),
             "Logical operators must have same type for operands and result!",
@@ -1184,7 +1200,7 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) {
   case Instruction::Shl:
   case Instruction::LShr:
   case Instruction::AShr:
-    Assert1(B.getType()->isIntOrIntVector(),
+    Assert1(B.getType()->isIntOrIntVectorTy(),
             "Shifts only work with integral types!", &B);
     Assert1(B.getType() == B.getOperand(0)->getType(),
             "Shift return type must be same as operands!", &B);
@@ -1203,7 +1219,7 @@ void Verifier::visitICmpInst(ICmpInst& IC) {
   Assert1(Op0Ty == Op1Ty,
           "Both operands to ICmp instruction are not of the same type!", &IC);
   // Check that the operands are the right type
-  Assert1(Op0Ty->isIntOrIntVector() || isa<PointerType>(Op0Ty),
+  Assert1(Op0Ty->isIntOrIntVectorTy() || Op0Ty->isPointerTy(),
           "Invalid operand types for ICmp instruction", &IC);
 
   visitInstruction(IC);
@@ -1216,7 +1232,7 @@ void Verifier::visitFCmpInst(FCmpInst& FC) {
   Assert1(Op0Ty == Op1Ty,
           "Both operands to FCmp instruction are not of the same type!", &FC);
   // Check that the operands are the right type
-  Assert1(Op0Ty->isFPOrFPVector(),
+  Assert1(Op0Ty->isFPOrFPVectorTy(),
           "Invalid operand types for FCmp instruction", &FC);
   visitInstruction(FC);
 }
@@ -1270,7 +1286,7 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
     GetElementPtrInst::getIndexedType(GEP.getOperand(0)->getType(),
                                       Idxs.begin(), Idxs.end());
   Assert1(ElTy, "Invalid indices for GEP pointer type!", &GEP);
-  Assert2(isa<PointerType>(GEP.getType()) &&
+  Assert2(GEP.getType()->isPointerTy() &&
           cast<PointerType>(GEP.getType())->getElementType() == ElTy,
           "GEP is not of right type for indices!", &GEP, ElTy);
   visitInstruction(GEP);
@@ -1302,7 +1318,7 @@ void Verifier::visitAllocaInst(AllocaInst &AI) {
           &AI);
   Assert1(PTy->getElementType()->isSized(), "Cannot allocate unsized type",
           &AI);
-  Assert1(AI.getArraySize()->getType()->isInteger(32),
+  Assert1(AI.getArraySize()->getType()->isIntegerTy(32),
           "Alloca array size must be i32", &AI);
   visitInstruction(AI);
 }
@@ -1481,7 +1497,7 @@ void Verifier::visitInstruction(Instruction &I) {
 void Verifier::VerifyType(const Type *Ty) {
   if (!Types.insert(Ty)) return;
 
-  Assert1(&Mod->getContext() == &Ty->getContext(),
+  Assert1(Context == &Ty->getContext(),
           "Type context does not match Module context!", Ty);
 
   switch (Ty->getTypeID()) {
@@ -1509,6 +1525,15 @@ void Verifier::VerifyType(const Type *Ty) {
       VerifyType(ElTy);
     }
   } break;
+  case Type::UnionTyID: {
+    const UnionType *UTy = cast<UnionType>(Ty);
+    for (unsigned i = 0, e = UTy->getNumElements(); i != e; ++i) {
+      const Type *ElTy = UTy->getElementType(i);
+      Assert2(UnionType::isValidElementType(ElTy),
+              "Union type with invalid element type", ElTy, UTy);
+      VerifyType(ElTy);
+    }
+  } break;
   case Type::ArrayTyID: {
     const ArrayType *ATy = cast<ArrayType>(Ty);
     Assert1(ArrayType::isValidElementType(ATy->getElementType()),
@@ -1616,7 +1641,7 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
     if (ID == Intrinsic::gcroot) {
       AllocaInst *AI =
         dyn_cast<AllocaInst>(CI.getOperand(1)->stripPointerCasts());
-      Assert1(AI && isa<PointerType>(AI->getType()->getElementType()),
+      Assert1(AI && AI->getType()->getElementType()->isPointerTy(),
               "llvm.gcroot parameter #1 must be a pointer alloca.", &CI);
       Assert1(isa<Constant>(CI.getOperand(2)),
               "llvm.gcroot parameter #2 must be a constant.", &CI);
@@ -1734,7 +1759,7 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
       }
     }
   } else if (VT == MVT::iAny) {
-    if (!EltTy->isInteger()) {
+    if (!EltTy->isIntegerTy()) {
       CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is not "
                   "an integer type.", F);
       return false;
@@ -1759,7 +1784,7 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
       break;
     }
   } else if (VT == MVT::fAny) {
-    if (!EltTy->isFloatingPoint()) {
+    if (!EltTy->isFloatingPointTy()) {
       CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is not "
                   "a floating-point type.", F);
       return false;
@@ -1778,7 +1803,7 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
     }
     Suffix += ".v" + utostr(NumElts) + EVT::getEVT(EltTy).getEVTString();
   } else if (VT == MVT::iPTR) {
-    if (!isa<PointerType>(Ty)) {
+    if (!Ty->isPointerTy()) {
       CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is not a "
                   "pointer and a pointer is required.", F);
       return false;