Update to LLVM 3.5a.

Change-Id: Ifadecab779f128e62e430c2b4f6ddd84953ed617
author: Stephen Hines <srhines@google.com> 2014-04-23 16:57:46 -0700
committer: Stephen Hines <srhines@google.com> 2014-04-24 15:53:16 -0700
commit: 36b56886974eae4f9c5ebc96befd3e7bfe5de338 (patch)
tree: e6cfb69fbbd937f450eeb83bfb83b9da3b01275a /lib/Analysis
parent: 69a8640022b04415ae9fac62f8ab090601d8f889 (diff)
download: external_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.zip
external_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.tar.gz
external_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.tar.bz2
59 files changed, 1997 insertions, 1297 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index b8b6d37..9583bbe 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -25,12 +25,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
@@ -338,7 +338,7 @@ AliasAnalysis::getModRefInfo(const VAArgInst *V, const Location &Loc) {
 AliasAnalysis::ModRefResult
 AliasAnalysis::getModRefInfo(const AtomicCmpXchgInst *CX, const Location &Loc) {
   // Acquire/Release cmpxchg has properties that matter for arbitrary addresses.
-  if (CX->getOrdering() > Monotonic)
+  if (CX->getSuccessOrdering() > Monotonic)
     return ModRef;
 
   // If the cmpxchg address does not alias the location, it does not access it.
@@ -370,9 +370,9 @@ namespace {
     CapturesBefore(const Instruction *I, DominatorTree *DT)
       : BeforeHere(I), DT(DT), Captured(false) {}
 
-    void tooManyUses() { Captured = true; }
+    void tooManyUses() override { Captured = true; }
 
-    bool shouldExplore(Use *U) {
+    bool shouldExplore(const Use *U) override {
       Instruction *I = cast<Instruction>(U->getUser());
       BasicBlock *BB = I->getParent();
       // We explore this usage only if the usage can reach "BeforeHere".
@@ -388,7 +388,7 @@ namespace {
       return true;
     }
 
-    bool captured(Use *U) {
+    bool captured(const Use *U) override {
       Instruction *I = cast<Instruction>(U->getUser());
       BasicBlock *BB = I->getParent();
       // Same logic as in shouldExplore.
@@ -416,9 +416,9 @@ AliasAnalysis::ModRefResult
 AliasAnalysis::callCapturesBefore(const Instruction *I,
                                   const AliasAnalysis::Location &MemLoc,
                                   DominatorTree *DT) {
-  if (!DT || !TD) return AliasAnalysis::ModRef;
+  if (!DT || !DL) return AliasAnalysis::ModRef;
 
-  const Value *Object = GetUnderlyingObject(MemLoc.Ptr, TD);
+  const Value *Object = GetUnderlyingObject(MemLoc.Ptr, DL);
   if (!isIdentifiedObject(Object) || isa<GlobalValue>(Object) ||
       isa<Constant>(Object))
     return AliasAnalysis::ModRef;
@@ -472,7 +472,8 @@ AliasAnalysis::~AliasAnalysis() {}
 /// AliasAnalysis interface before any other methods are called.
 ///
 void AliasAnalysis::InitializeAliasAnalysis(Pass *P) {
-  TD = P->getAnalysisIfAvailable<DataLayout>();
+  DataLayoutPass *DLP = P->getAnalysisIfAvailable<DataLayoutPass>();
+  DL = DLP ? &DLP->getDataLayout() : 0;
   TLI = P->getAnalysisIfAvailable<TargetLibraryInfo>();
   AA = &P->getAnalysis<AliasAnalysis>();
 }
@@ -487,7 +488,7 @@ void AliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
 /// if known, or a conservative value otherwise.
 ///
 uint64_t AliasAnalysis::getTypeStoreSize(Type *Ty) {
-  return TD ? TD->getTypeStoreSize(Ty) : UnknownSize;
+  return DL ? DL->getTypeStoreSize(Ty) : UnknownSize;
 }
 
 /// canBasicBlockModify - Return true if it is possible for execution of the
diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp
index 9f4a47c..2e3bc55 100644
--- a/lib/Analysis/AliasAnalysisCounter.cpp
+++ b/lib/Analysis/AliasAnalysisCounter.cpp
@@ -14,7 +14,6 @@
 
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Assembly/Writer.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -75,13 +74,13 @@ namespace {
       }
     }
 
-    bool runOnModule(Module &M) {
+    bool runOnModule(Module &M) override {
       this->M = &M;
       InitializeAliasAnalysis(this);
       return false;
     }
 
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       AliasAnalysis::getAnalysisUsage(AU);
       AU.addRequired<AliasAnalysis>();
       AU.setPreservesAll();
@@ -91,25 +90,25 @@ namespace {
     /// an analysis interface through multiple inheritance.  If needed, it
     /// should override this to adjust the this pointer as needed for the
     /// specified pass info.
-    virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+    void *getAdjustedAnalysisPointer(AnalysisID PI) override {
       if (PI == &AliasAnalysis::ID)
         return (AliasAnalysis*)this;
       return this;
     }
     
     // FIXME: We could count these too...
-    bool pointsToConstantMemory(const Location &Loc, bool OrLocal) {
+    bool pointsToConstantMemory(const Location &Loc, bool OrLocal) override {
       return getAnalysis<AliasAnalysis>().pointsToConstantMemory(Loc, OrLocal);
     }
 
     // Forwarding functions: just delegate to a real AA implementation, counting
     // the number of responses...
-    AliasResult alias(const Location &LocA, const Location &LocB);
+    AliasResult alias(const Location &LocA, const Location &LocB) override;
 
     ModRefResult getModRefInfo(ImmutableCallSite CS,
-                               const Location &Loc);
+                               const Location &Loc) override;
     ModRefResult getModRefInfo(ImmutableCallSite CS1,
-                               ImmutableCallSite CS2) {
+                               ImmutableCallSite CS2) override {
       return AliasAnalysis::getModRefInfo(CS1,CS2);
     }
   };
@@ -138,10 +137,10 @@ AliasAnalysisCounter::alias(const Location &LocA, const Location &LocB) {
   if (PrintAll || (PrintAllFailures && R == MayAlias)) {
     errs() << AliasString << ":\t";
     errs() << "[" << LocA.Size << "B] ";
-    WriteAsOperand(errs(), LocA.Ptr, true, M);
+    LocA.Ptr->printAsOperand(errs(), true, M);
     errs() << ", ";
     errs() << "[" << LocB.Size << "B] ";
-    WriteAsOperand(errs(), LocB.Ptr, true, M);
+    LocB.Ptr->printAsOperand(errs(), true, M);
     errs() << "\n";
   }
 
@@ -164,7 +163,7 @@ AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS,
   if (PrintAll || (PrintAllFailures && R == ModRef)) {
     errs() << MRString << ":  Ptr: ";
     errs() << "[" << Loc.Size << "B] ";
-    WriteAsOperand(errs(), Loc.Ptr, true, M);
+    Loc.Ptr->printAsOperand(errs(), true, M);
     errs() << "\t<->" << *CS.getInstruction() << '\n';
   }
   return R;
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index a571463..d9fa5a5 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -20,15 +20,14 @@
 #include "llvm/Analysis/Passes.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Assembly/Writer.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/InstIterator.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -57,12 +56,12 @@ namespace {
       initializeAAEvalPass(*PassRegistry::getPassRegistry());
     }
 
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<AliasAnalysis>();
       AU.setPreservesAll();
     }
 
-    bool doInitialization(Module &M) {
+    bool doInitialization(Module &M) override {
       NoAlias = MayAlias = PartialAlias = MustAlias = 0;
       NoModRef = Mod = Ref = ModRef = 0;
 
@@ -74,8 +73,8 @@ namespace {
       return false;
     }
 
-    bool runOnFunction(Function &F);
-    bool doFinalization(Module &M);
+    bool runOnFunction(Function &F) override;
+    bool doFinalization(Module &M) override;
   };
 }
 
@@ -94,8 +93,8 @@ static void PrintResults(const char *Msg, bool P, const Value *V1,
     std::string o1, o2;
     {
       raw_string_ostream os1(o1), os2(o2);
-      WriteAsOperand(os1, V1, true, M);
-      WriteAsOperand(os2, V2, true, M);
+      V1->printAsOperand(os1, true, M);
+      V2->printAsOperand(os2, true, M);
     }
     
     if (o2 < o1)
@@ -111,7 +110,7 @@ PrintModRefResults(const char *Msg, bool P, Instruction *I, Value *Ptr,
                    Module *M) {
   if (P) {
     errs() << "  " << Msg << ":  Ptr: ";
-    WriteAsOperand(errs(), Ptr, true, M);
+    Ptr->printAsOperand(errs(), true, M);
     errs() << "\t<->" << *I << '\n';
   }
 }
diff --git a/lib/Analysis/AliasDebugger.cpp b/lib/Analysis/AliasDebugger.cpp
index f6178e3..5d61cf9 100644
--- a/lib/Analysis/AliasDebugger.cpp
+++ b/lib/Analysis/AliasDebugger.cpp
@@ -43,7 +43,7 @@ namespace {
       initializeAliasDebuggerPass(*PassRegistry::getPassRegistry());
     }
 
-    bool runOnModule(Module &M) {
+    bool runOnModule(Module &M) override {
       InitializeAliasAnalysis(this);                 // set up super class
 
       for(Module::global_iterator I = M.global_begin(),
@@ -76,7 +76,7 @@ namespace {
       return false;
     }
 
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       AliasAnalysis::getAnalysisUsage(AU);
       AU.setPreservesAll();                         // Does not transform code
     }
@@ -85,7 +85,7 @@ namespace {
     /// an analysis interface through multiple inheritance.  If needed, it
     /// should override this to adjust the this pointer as needed for the
     /// specified pass info.
-    virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+    void *getAdjustedAnalysisPointer(AnalysisID PI) override {
       if (PI == &AliasAnalysis::ID)
         return (AliasAnalysis*)this;
       return this;
@@ -94,7 +94,7 @@ namespace {
     //------------------------------------------------
     // Implement the AliasAnalysis API
     //
-    AliasResult alias(const Location &LocA, const Location &LocB) {
+    AliasResult alias(const Location &LocA, const Location &LocB) override {
       assert(Vals.find(LocA.Ptr) != Vals.end() &&
              "Never seen value in AA before");
       assert(Vals.find(LocB.Ptr) != Vals.end() &&
@@ -103,26 +103,26 @@ namespace {
     }
 
     ModRefResult getModRefInfo(ImmutableCallSite CS,
-                               const Location &Loc) {
+                               const Location &Loc) override {
       assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before");
       return AliasAnalysis::getModRefInfo(CS, Loc);
     }
 
     ModRefResult getModRefInfo(ImmutableCallSite CS1,
-                               ImmutableCallSite CS2) {
+                               ImmutableCallSite CS2) override {
       return AliasAnalysis::getModRefInfo(CS1,CS2);
     }
-    
-    bool pointsToConstantMemory(const Location &Loc, bool OrLocal) {
+
+    bool pointsToConstantMemory(const Location &Loc, bool OrLocal) override {
       assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before");
       return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
     }
 
-    virtual void deleteValue(Value *V) {
+    void deleteValue(Value *V) override {
       assert(Vals.find(V) != Vals.end() && "Never seen value in AA before");
       AliasAnalysis::deleteValue(V);
     }
-    virtual void copyValue(Value *From, Value *To) {
+    void copyValue(Value *From, Value *To) override {
       Vals.insert(To);
       AliasAnalysis::copyValue(From, To);
     }
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index 2289c12..ab1005e 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -13,8 +13,8 @@
 
 #include "llvm/Analysis/AliasSetTracker.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Assembly/Writer.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
@@ -22,7 +22,6 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/InstIterator.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -566,7 +565,7 @@ void AliasSet::print(raw_ostream &OS) const {
     OS << "Pointers: ";
     for (iterator I = begin(), E = end(); I != E; ++I) {
       if (I != begin()) OS << ", ";
-      WriteAsOperand(OS << "(", I.getPointer());
+      I.getPointer()->printAsOperand(OS << "(");
       OS << ", " << I.getSize() << ")";
     }
   }
@@ -574,7 +573,7 @@ void AliasSet::print(raw_ostream &OS) const {
     OS << "\n    " << UnknownInsts.size() << " Unknown instructions: ";
     for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) {
       if (i) OS << ", ";
-      WriteAsOperand(OS, UnknownInsts[i]);
+      UnknownInsts[i]->printAsOperand(OS);
     }
   }
   OS << "\n";
@@ -628,12 +627,12 @@ namespace {
       initializeAliasSetPrinterPass(*PassRegistry::getPassRegistry());
     }
 
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesAll();
       AU.addRequired<AliasAnalysis>();
     }
 
-    virtual bool runOnFunction(Function &F) {
+    bool runOnFunction(Function &F) override {
       Tracker = new AliasSetTracker(getAnalysis<AliasAnalysis>());
 
       for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index 98f2a55..c960123 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -9,10 +9,11 @@
 
 #include "llvm-c/Analysis.h"
 #include "llvm-c/Initialization.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/InitializePasses.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/PassRegistry.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cstring>
 
 using namespace llvm;
@@ -72,21 +73,33 @@ void LLVMInitializeAnalysis(LLVMPassRegistryRef R) {
 
 LLVMBool LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action,
                           char **OutMessages) {
+  raw_ostream *DebugOS = Action != LLVMReturnStatusAction ? &errs() : 0;
   std::string Messages;
+  raw_string_ostream MsgsOS(Messages);
+
+  LLVMBool Result = verifyModule(*unwrap(M), OutMessages ? &MsgsOS : DebugOS);
+
+  // Duplicate the output to stderr.
+  if (DebugOS && OutMessages)
+    *DebugOS << MsgsOS.str();
 
-  LLVMBool Result = verifyModule(*unwrap(M),
-                            static_cast<VerifierFailureAction>(Action),
-                            OutMessages? &Messages : 0);
+  if (Action == LLVMAbortProcessAction && Result)
+    report_fatal_error("Broken module found, compilation aborted!");
 
   if (OutMessages)
-    *OutMessages = strdup(Messages.c_str());
+    *OutMessages = strdup(MsgsOS.str().c_str());
 
   return Result;
 }
 
 LLVMBool LLVMVerifyFunction(LLVMValueRef Fn, LLVMVerifierFailureAction Action) {
-  return verifyFunction(*unwrap<Function>(Fn),
-                        static_cast<VerifierFailureAction>(Action));
+  LLVMBool Result = verifyFunction(
+      *unwrap<Function>(Fn), Action != LLVMReturnStatusAction ? &errs() : 0);
+
+  if (Action == LLVMAbortProcessAction && Result)
+    report_fatal_error("Broken function found, compilation aborted!");
+
+  return Result;
 }
 
 void LLVMViewFunctionCFG(LLVMValueRef Fn) {
diff --git a/lib/Analysis/Android.mk b/lib/Analysis/Android.mk
index 86d71f0..76eee74 100644
--- a/lib/Analysis/Android.mk
+++ b/lib/Analysis/Android.mk
@@ -67,6 +67,7 @@ include $(BUILD_HOST_STATIC_LIBRARY)
 
 # For the device
 # =====================================================
+ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS))
 include $(CLEAR_VARS)
 
 LOCAL_MODULE:= libLLVMAnalysis
@@ -76,3 +77,4 @@ LOCAL_SRC_FILES := $(analysis_SRC_FILES)
 include $(LLVM_DEVICE_BUILD_MK)
 include $(LLVM_GEN_INTRINSICS_MK)
 include $(BUILD_STATIC_LIBRARY)
+endif
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index b2c2011..e267374 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -17,14 +17,18 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/GlobalAlias.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Instructions.h"
@@ -33,11 +37,21 @@
 #include "llvm/IR/Operator.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include <algorithm>
 using namespace llvm;
 
+/// Cutoff after which to stop analysing a set of phi nodes potentially involved
+/// in a cycle. Because we are analysing 'through' phi nodes we need to be
+/// careful with value equivalence. We use reachability to make sure a value
+/// cannot be involved in a cycle.
+const unsigned MaxNumPhiBBsValueReachabilityCheck = 20;
+
+// The max limit of the search depth in DecomposeGEPExpression() and
+// GetUnderlyingObject(), both functions need to use the same search
+// depth otherwise the algorithm in aliasGEP will assert.
+static const unsigned MaxLookupSearchDepth = 6;
+
 //===----------------------------------------------------------------------===//
 // Useful predicates
 //===----------------------------------------------------------------------===//
@@ -84,11 +98,11 @@ static bool isEscapeSource(const Value *V) {
 
 /// getObjectSize - Return the size of the object specified by V, or
 /// UnknownSize if unknown.
-static uint64_t getObjectSize(const Value *V, const DataLayout &TD,
+static uint64_t getObjectSize(const Value *V, const DataLayout &DL,
                               const TargetLibraryInfo &TLI,
                               bool RoundToAlign = false) {
   uint64_t Size;
-  if (getObjectSize(V, Size, &TD, &TLI, RoundToAlign))
+  if (getObjectSize(V, Size, &DL, &TLI, RoundToAlign))
     return Size;
   return AliasAnalysis::UnknownSize;
 }
@@ -96,7 +110,7 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &TD,
 /// isObjectSmallerThan - Return true if we can prove that the object specified
 /// by V is smaller than Size.
 static bool isObjectSmallerThan(const Value *V, uint64_t Size,
-                                const DataLayout &TD,
+                                const DataLayout &DL,
                                 const TargetLibraryInfo &TLI) {
   // Note that the meanings of the "object" are slightly different in the
   // following contexts:
@@ -129,7 +143,7 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size,
 
   // This function needs to use the aligned object size because we allow
   // reads a bit past the end given sufficient alignment.
-  uint64_t ObjectSize = getObjectSize(V, TD, TLI, /*RoundToAlign*/true);
+  uint64_t ObjectSize = getObjectSize(V, DL, TLI, /*RoundToAlign*/true);
 
   return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize < Size;
 }
@@ -137,15 +151,15 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size,
 /// isObjectSize - Return true if we can prove that the object specified
 /// by V has size Size.
 static bool isObjectSize(const Value *V, uint64_t Size,
-                         const DataLayout &TD, const TargetLibraryInfo &TLI) {
-  uint64_t ObjectSize = getObjectSize(V, TD, TLI);
+                         const DataLayout &DL, const TargetLibraryInfo &TLI) {
+  uint64_t ObjectSize = getObjectSize(V, DL, TLI);
   return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize == Size;
 }
 
 /// isIdentifiedFunctionLocal - Return true if V is umabigously identified
 /// at the function-level. Different IdentifiedFunctionLocals can't alias.
 /// Further, an IdentifiedFunctionLocal can not alias with any function
-/// arguments other than itself, which is not neccessarily true for
+/// arguments other than itself, which is not necessarily true for
 /// IdentifiedObjects.
 static bool isIdentifiedFunctionLocal(const Value *V)
 {
@@ -191,7 +205,7 @@ namespace {
 /// represented in the result.
 static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
                                   ExtensionKind &Extension,
-                                  const DataLayout &TD, unsigned Depth) {
+                                  const DataLayout &DL, unsigned Depth) {
   assert(V->getType()->isIntegerTy() && "Not an integer value");
 
   // Limit our recursion depth.
@@ -208,23 +222,23 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
       case Instruction::Or:
         // X|C == X+C if all the bits in C are unset in X.  Otherwise we can't
         // analyze it.
-        if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), &TD))
+        if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), &DL))
           break;
         // FALL THROUGH.
       case Instruction::Add:
         V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
-                                TD, Depth+1);
+                                DL, Depth+1);
         Offset += RHSC->getValue();
         return V;
       case Instruction::Mul:
         V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
-                                TD, Depth+1);
+                                DL, Depth+1);
         Offset *= RHSC->getValue();
         Scale *= RHSC->getValue();
         return V;
       case Instruction::Shl:
         V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
-                                TD, Depth+1);
+                                DL, Depth+1);
         Offset <<= RHSC->getValue().getLimitedValue();
         Scale <<= RHSC->getValue().getLimitedValue();
         return V;
@@ -245,7 +259,7 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
     Extension = isa<SExtInst>(V) ? EK_SignExt : EK_ZeroExt;
 
     Value *Result = GetLinearExpression(CastOp, Scale, Offset, Extension,
-                                        TD, Depth+1);
+                                        DL, Depth+1);
     Scale = Scale.zext(OldWidth);
     Offset = Offset.zext(OldWidth);
 
@@ -267,15 +281,18 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
 /// the gep cannot necessarily be reconstructed from its decomposed form.
 ///
 /// When DataLayout is around, this function is capable of analyzing everything
-/// that GetUnderlyingObject can look through.  When not, it just looks
-/// through pointer casts.
+/// that GetUnderlyingObject can look through. To be able to do that
+/// GetUnderlyingObject and DecomposeGEPExpression must use the same search
+/// depth (MaxLookupSearchDepth).
+/// When DataLayout not is around, it just looks through pointer casts.
 ///
 static const Value *
 DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
                        SmallVectorImpl<VariableGEPIndex> &VarIndices,
-                       const DataLayout *TD) {
+                       bool &MaxLookupReached, const DataLayout *DL) {
   // Limit recursion depth to limit compile time in crazy cases.
-  unsigned MaxLookup = 6;
+  unsigned MaxLookup = MaxLookupSearchDepth;
+  MaxLookupReached = false;
 
   BaseOffs = 0;
   do {
@@ -304,7 +321,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
       if (const Instruction *I = dyn_cast<Instruction>(V))
         // TODO: Get a DominatorTree and use it here.
         if (const Value *Simplified =
-              SimplifyInstruction(const_cast<Instruction *>(I), TD)) {
+              SimplifyInstruction(const_cast<Instruction *>(I), DL)) {
           V = Simplified;
           continue;
         }
@@ -319,7 +336,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
     // If we are lacking DataLayout information, we can't compute the offets of
     // elements computed by GEPs.  However, we can handle bitcast equivalent
     // GEPs.
-    if (TD == 0) {
+    if (DL == 0) {
       if (!GEPOp->hasAllZeroIndices())
         return V;
       V = GEPOp->getOperand(0);
@@ -338,30 +355,30 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
         unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
         if (FieldNo == 0) continue;
 
-        BaseOffs += TD->getStructLayout(STy)->getElementOffset(FieldNo);
+        BaseOffs += DL->getStructLayout(STy)->getElementOffset(FieldNo);
         continue;
       }
 
       // For an array/pointer, add the element offset, explicitly scaled.
       if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
         if (CIdx->isZero()) continue;
-        BaseOffs += TD->getTypeAllocSize(*GTI)*CIdx->getSExtValue();
+        BaseOffs += DL->getTypeAllocSize(*GTI)*CIdx->getSExtValue();
         continue;
       }
 
-      uint64_t Scale = TD->getTypeAllocSize(*GTI);
+      uint64_t Scale = DL->getTypeAllocSize(*GTI);
       ExtensionKind Extension = EK_NotExtended;
 
       // If the integer type is smaller than the pointer size, it is implicitly
       // sign extended to pointer size.
       unsigned Width = Index->getType()->getIntegerBitWidth();
-      if (TD->getPointerSizeInBits(AS) > Width)
+      if (DL->getPointerSizeInBits(AS) > Width)
         Extension = EK_SignExt;
 
       // Use GetLinearExpression to decompose the index into a C1*V+C2 form.
       APInt IndexScale(Width, 0), IndexOffset(Width, 0);
       Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension,
-                                  *TD, 0);
+                                  *DL, 0);
 
       // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
       // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
@@ -383,7 +400,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
 
       // Make sure that we have a scale that makes sense for this target's
       // pointer size.
-      if (unsigned ShiftBits = 64 - TD->getPointerSizeInBits(AS)) {
+      if (unsigned ShiftBits = 64 - DL->getPointerSizeInBits(AS)) {
         Scale <<= ShiftBits;
         Scale = (int64_t)Scale >> ShiftBits;
       }
@@ -400,45 +417,10 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
   } while (--MaxLookup);
 
   // If the chain of expressions is too deep, just return early.
+  MaxLookupReached = true;
   return V;
 }
 
-/// GetIndexDifference - Dest and Src are the variable indices from two
-/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base
-/// pointers.  Subtract the GEP2 indices from GEP1 to find the symbolic
-/// difference between the two pointers.
-static void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
-                               const SmallVectorImpl<VariableGEPIndex> &Src) {
-  if (Src.empty()) return;
-
-  for (unsigned i = 0, e = Src.size(); i != e; ++i) {
-    const Value *V = Src[i].V;
-    ExtensionKind Extension = Src[i].Extension;
-    int64_t Scale = Src[i].Scale;
-
-    // Find V in Dest.  This is N^2, but pointer indices almost never have more
-    // than a few variable indexes.
-    for (unsigned j = 0, e = Dest.size(); j != e; ++j) {
-      if (Dest[j].V != V || Dest[j].Extension != Extension) continue;
-
-      // If we found it, subtract off Scale V's from the entry in Dest.  If it
-      // goes to zero, remove the entry.
-      if (Dest[j].Scale != Scale)
-        Dest[j].Scale -= Scale;
-      else
-        Dest.erase(Dest.begin()+j);
-      Scale = 0;
-      break;
-    }
-
-    // If we didn't consume this entry, add it to the end of the Dest list.
-    if (Scale) {
-      VariableGEPIndex Entry = { V, Extension, -Scale };
-      Dest.push_back(Entry);
-    }
-  }
-}
-
 //===----------------------------------------------------------------------===//
 // BasicAliasAnalysis Pass
 //===----------------------------------------------------------------------===//
@@ -471,17 +453,16 @@ namespace {
       initializeBasicAliasAnalysisPass(*PassRegistry::getPassRegistry());
     }
 
-    virtual void initializePass() {
+    void initializePass() override {
       InitializeAliasAnalysis(this);
     }
 
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<AliasAnalysis>();
       AU.addRequired<TargetLibraryInfo>();
     }
 
-    virtual AliasResult alias(const Location &LocA,
-                              const Location &LocB) {
+    AliasResult alias(const Location &LocA, const Location &LocB) override {
       assert(AliasCache.empty() && "AliasCache must be cleared after use!");
       assert(notDifferentParent(LocA.Ptr, LocB.Ptr) &&
              "BasicAliasAnalysis doesn't support interprocedural queries.");
@@ -492,35 +473,36 @@ namespace {
       // SmallDenseMap if it ever grows larger.
       // FIXME: This should really be shrink_to_inline_capacity_and_clear().
       AliasCache.shrink_and_clear();
+      VisitedPhiBBs.clear();
       return Alias;
     }
 
-    virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
-                                       const Location &Loc);
+    ModRefResult getModRefInfo(ImmutableCallSite CS,
+                               const Location &Loc) override;
 
-    virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
-                                       ImmutableCallSite CS2) {
+    ModRefResult getModRefInfo(ImmutableCallSite CS1,
+                               ImmutableCallSite CS2) override {
       // The AliasAnalysis base class has some smarts, lets use them.
       return AliasAnalysis::getModRefInfo(CS1, CS2);
     }
 
     /// pointsToConstantMemory - Chase pointers until we find a (constant
     /// global) or not.
-    virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal);
+    bool pointsToConstantMemory(const Location &Loc, bool OrLocal) override;
 
     /// getModRefBehavior - Return the behavior when calling the given
     /// call site.
-    virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+    ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override;
 
     /// getModRefBehavior - Return the behavior when calling the given function.
     /// For use when the call site is not known.
-    virtual ModRefBehavior getModRefBehavior(const Function *F);
+    ModRefBehavior getModRefBehavior(const Function *F) override;
 
     /// getAdjustedAnalysisPointer - This method is used when a pass implements
     /// an analysis interface through multiple inheritance.  If needed, it
     /// should override this to adjust the this pointer as needed for the
     /// specified pass info.
-    virtual void *getAdjustedAnalysisPointer(const void *ID) {
+    void *getAdjustedAnalysisPointer(const void *ID) override {
       if (ID == &AliasAnalysis::ID)
         return (AliasAnalysis*)this;
       return this;
@@ -532,9 +514,39 @@ namespace {
     typedef SmallDenseMap<LocPair, AliasResult, 8> AliasCacheTy;
     AliasCacheTy AliasCache;
 
+    /// \brief Track phi nodes we have visited. When interpret "Value" pointer
+    /// equality as value equality we need to make sure that the "Value" is not
+    /// part of a cycle. Otherwise, two uses could come from different
+    /// "iterations" of a cycle and see different values for the same "Value"
+    /// pointer.
+    /// The following example shows the problem:
+    ///   %p = phi(%alloca1, %addr2)
+    ///   %l = load %ptr
+    ///   %addr1 = gep, %alloca2, 0, %l
+    ///   %addr2 = gep  %alloca2, 0, (%l + 1)
+    ///      alias(%p, %addr1) -> MayAlias !
+    ///   store %l, ...
+    SmallPtrSet<const BasicBlock*, 8> VisitedPhiBBs;
+
     // Visited - Track instructions visited by pointsToConstantMemory.
     SmallPtrSet<const Value*, 16> Visited;
 
+    /// \brief Check whether two Values can be considered equivalent.
+    ///
+    /// In addition to pointer equivalence of \p V1 and \p V2 this checks
+    /// whether they can not be part of a cycle in the value graph by looking at
+    /// all visited phi nodes an making sure that the phis cannot reach the
+    /// value. We have to do this because we are looking through phi nodes (That
+    /// is we say noalias(V, phi(VA, VB)) if noalias(V, VA) and noalias(V, VB).
+    bool isValueEqualInPotentialCycles(const Value *V1, const Value *V2);
+
+    /// \brief Dest and Src are the variable indices from two decomposed
+    /// GetElementPtr instructions GEP1 and GEP2 which have common base
+    /// pointers.  Subtract the GEP2 indices from GEP1 to find the symbolic
+    /// difference between the two pointers.
+    void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
+                            const SmallVectorImpl<VariableGEPIndex> &Src);
+
     // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP
     // instruction against another.
     AliasResult aliasGEP(const GEPOperator *V1, uint64_t V1Size,
@@ -589,7 +601,7 @@ BasicAliasAnalysis::pointsToConstantMemory(const Location &Loc, bool OrLocal) {
   SmallVector<const Value *, 16> Worklist;
   Worklist.push_back(Loc.Ptr);
   do {
-    const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), TD);
+    const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), DL);
     if (!Visited.insert(V)) {
       Visited.clear();
       return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
@@ -694,7 +706,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
   assert(notDifferentParent(CS.getInstruction(), Loc.Ptr) &&
          "AliasAnalysis query involving multiple functions!");
 
-  const Value *Object = GetUnderlyingObject(Loc.Ptr, TD);
+  const Value *Object = GetUnderlyingObject(Loc.Ptr, DL);
 
   // If this is a tail call and Loc.Ptr points to a stack location, we know that
   // the tail call cannot access or modify the local stack.
@@ -801,7 +813,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
       // LLVM's vld1 and vst1 intrinsics currently only support a single
       // vector register.
       uint64_t Size =
-        TD ? TD->getTypeStoreSize(II->getType()) : UnknownSize;
+        DL ? DL->getTypeStoreSize(II->getType()) : UnknownSize;
       if (isNoAlias(Location(II->getArgOperand(0), Size,
                              II->getMetadata(LLVMContext::MD_tbaa)),
                     Loc))
@@ -810,7 +822,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
     }
     case Intrinsic::arm_neon_vst1: {
       uint64_t Size =
-        TD ? TD->getTypeStoreSize(II->getArgOperand(1)->getType()) : UnknownSize;
+        DL ? DL->getTypeStoreSize(II->getArgOperand(1)->getType()) : UnknownSize;
       if (isNoAlias(Location(II->getArgOperand(0), Size,
                              II->getMetadata(LLVMContext::MD_tbaa)),
                     Loc))
@@ -873,7 +885,7 @@ static bool areVarIndicesEqual(SmallVectorImpl<VariableGEPIndex> &Indices1,
 
 /// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction
 /// against another pointer.  We know that V1 is a GEP, but we don't know
-/// anything about V2.  UnderlyingV1 is GetUnderlyingObject(GEP1, TD),
+/// anything about V2.  UnderlyingV1 is GetUnderlyingObject(GEP1, DL),
 /// UnderlyingV2 is the same for V2.
 ///
 AliasAnalysis::AliasResult
@@ -884,6 +896,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
                              const Value *UnderlyingV1,
                              const Value *UnderlyingV2) {
   int64_t GEP1BaseOffset;
+  bool GEP1MaxLookupReached;
   SmallVector<VariableGEPIndex, 4> GEP1VariableIndices;
 
   // If we have two gep instructions with must-alias or not-alias'ing base
@@ -905,18 +918,25 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
         // See if the computed offset from the common pointer tells us about the
         // relation of the resulting pointer.
         int64_t GEP2BaseOffset;
+        bool GEP2MaxLookupReached;
         SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
         const Value *GEP2BasePtr =
-          DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, TD);
+          DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices,
+                                 GEP2MaxLookupReached, DL);
         const Value *GEP1BasePtr =
-          DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD);
+          DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
+                                 GEP1MaxLookupReached, DL);
         // DecomposeGEPExpression and GetUnderlyingObject should return the
         // same result except when DecomposeGEPExpression has no DataLayout.
         if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
-          assert(TD == 0 &&
+          assert(DL == 0 &&
              "DecomposeGEPExpression and GetUnderlyingObject disagree!");
           return MayAlias;
         }
+        // If the max search depth is reached the result is undefined
+        if (GEP2MaxLookupReached || GEP1MaxLookupReached)
+          return MayAlias;
+
         // Same offsets.
         if (GEP1BaseOffset == GEP2BaseOffset &&
             areVarIndicesEqual(GEP1VariableIndices, GEP2VariableIndices))
@@ -933,20 +953,26 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
     // exactly, see if the computed offset from the common pointer tells us
     // about the relation of the resulting pointer.
     const Value *GEP1BasePtr =
-      DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD);
+      DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
+                             GEP1MaxLookupReached, DL);
 
     int64_t GEP2BaseOffset;
+    bool GEP2MaxLookupReached;
     SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
     const Value *GEP2BasePtr =
-      DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, TD);
+      DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices,
+                             GEP2MaxLookupReached, DL);
 
     // DecomposeGEPExpression and GetUnderlyingObject should return the
     // same result except when DecomposeGEPExpression has no DataLayout.
     if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
-      assert(TD == 0 &&
+      assert(DL == 0 &&
              "DecomposeGEPExpression and GetUnderlyingObject disagree!");
       return MayAlias;
     }
+    // If the max search depth is reached the result is undefined
+    if (GEP2MaxLookupReached || GEP1MaxLookupReached)
+      return MayAlias;
 
     // Subtract the GEP2 pointer from the GEP1 pointer to find out their
     // symbolic difference.
@@ -973,15 +999,19 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
       return R;
 
     const Value *GEP1BasePtr =
-      DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD);
+      DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
+                             GEP1MaxLookupReached, DL);
 
     // DecomposeGEPExpression and GetUnderlyingObject should return the
     // same result except when DecomposeGEPExpression has no DataLayout.
     if (GEP1BasePtr != UnderlyingV1) {
-      assert(TD == 0 &&
+      assert(DL == 0 &&
              "DecomposeGEPExpression and GetUnderlyingObject disagree!");
       return MayAlias;
     }
+    // If the max search depth is reached the result is undefined
+    if (GEP1MaxLookupReached)
+      return MayAlias;
   }
 
   // In the two GEP Case, if there is no difference in the offsets of the
@@ -1005,7 +1035,15 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
         return NoAlias;
       }
     } else {
-      if (V1Size != UnknownSize) {
+      // We have the situation where:
+      // +                +
+      // | BaseOffset     |
+      // ---------------->|
+      // |-->V1Size       |-------> V2Size
+      // GEP1             V2
+      // We need to know that V2Size is not unknown, otherwise we might have
+      // stripped a gep with negative index ('gep <ptr>, -1, ...).
+      if (V1Size != UnknownSize && V2Size != UnknownSize) {
         if (-(uint64_t)GEP1BaseOffset < V1Size)
           return PartialAlias;
         return NoAlias;
@@ -1094,6 +1132,10 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
                              const MDNode *PNTBAAInfo,
                              const Value *V2, uint64_t V2Size,
                              const MDNode *V2TBAAInfo) {
+  // Track phi nodes we have visited. We use this information when we determine
+  // value equivalence.
+  VisitedPhiBBs.insert(PN->getParent());
+
   // If the values are PHIs in the same block, we can do a more precise
   // as well as efficient check: just check for aliases between the values
   // on corresponding edges.
@@ -1187,14 +1229,20 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
   V2 = V2->stripPointerCasts();
 
   // Are we checking for alias of the same value?
-  if (V1 == V2) return MustAlias;
+  // Because we look 'through' phi nodes we could look at "Value" pointers from
+  // different iterations. We must therefore make sure that this is not the
+  // case. The function isValueEqualInPotentialCycles ensures that this cannot
+  // happen by looking at the visited phi nodes and making sure they cannot
+  // reach the value.
+  if (isValueEqualInPotentialCycles(V1, V2))
+    return MustAlias;
 
   if (!V1->getType()->isPointerTy() || !V2->getType()->isPointerTy())
     return NoAlias;  // Scalars cannot alias each other
 
   // Figure out what objects these things are pointing to if we can.
-  const Value *O1 = GetUnderlyingObject(V1, TD);
-  const Value *O2 = GetUnderlyingObject(V2, TD);
+  const Value *O1 = GetUnderlyingObject(V1, DL, MaxLookupSearchDepth);
+  const Value *O2 = GetUnderlyingObject(V2, DL, MaxLookupSearchDepth);
 
   // Null values in the default address space don't point to any object, so they
   // don't alias any other pointer.
@@ -1243,9 +1291,9 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
 
   // If the size of one access is larger than the entire object on the other
   // side, then we know such behavior is undefined and can assume no alias.
-  if (TD)
-    if ((V1Size != UnknownSize && isObjectSmallerThan(O2, V1Size, *TD, *TLI)) ||
-        (V2Size != UnknownSize && isObjectSmallerThan(O1, V2Size, *TD, *TLI)))
+  if (DL)
+    if ((V1Size != UnknownSize && isObjectSmallerThan(O2, V1Size, *DL, *TLI)) ||
+        (V2Size != UnknownSize && isObjectSmallerThan(O1, V2Size, *DL, *TLI)))
       return NoAlias;
 
   // Check the cache before climbing up use-def chains. This also terminates
@@ -1297,9 +1345,9 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
   // If both pointers are pointing into the same object and one of them
   // accesses is accessing the entire object, then the accesses must
   // overlap in some way.
-  if (TD && O1 == O2)
-    if ((V1Size != UnknownSize && isObjectSize(O1, V1Size, *TD, *TLI)) ||
-        (V2Size != UnknownSize && isObjectSize(O2, V2Size, *TD, *TLI)))
+  if (DL && O1 == O2)
+    if ((V1Size != UnknownSize && isObjectSize(O1, V1Size, *DL, *TLI)) ||
+        (V2Size != UnknownSize && isObjectSize(O2, V2Size, *DL, *TLI)))
       return AliasCache[Locs] = PartialAlias;
 
   AliasResult Result =
@@ -1307,3 +1355,73 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
                          Location(V2, V2Size, V2TBAAInfo));
   return AliasCache[Locs] = Result;
 }
+
+bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V,
+                                                       const Value *V2) {
+  if (V != V2)
+    return false;
+
+  const Instruction *Inst = dyn_cast<Instruction>(V);
+  if (!Inst)
+    return true;
+
+  if (VisitedPhiBBs.size() > MaxNumPhiBBsValueReachabilityCheck)
+    return false;
+
+  // Use dominance or loop info if available.
+  DominatorTreeWrapperPass *DTWP =
+      getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+  DominatorTree *DT = DTWP ? &DTWP->getDomTree() : 0;
+  LoopInfo *LI = getAnalysisIfAvailable<LoopInfo>();
+
+  // Make sure that the visited phis cannot reach the Value. This ensures that
+  // the Values cannot come from different iterations of a potential cycle the
+  // phi nodes could be involved in.
+  for (SmallPtrSet<const BasicBlock *, 8>::iterator PI = VisitedPhiBBs.begin(),
+                                                    PE = VisitedPhiBBs.end();
+       PI != PE; ++PI)
+    if (isPotentiallyReachable((*PI)->begin(), Inst, DT, LI))
+      return false;
+
+  return true;
+}
+
+/// GetIndexDifference - Dest and Src are the variable indices from two
+/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base
+/// pointers.  Subtract the GEP2 indices from GEP1 to find the symbolic
+/// difference between the two pointers.
+void BasicAliasAnalysis::GetIndexDifference(
+    SmallVectorImpl<VariableGEPIndex> &Dest,
+    const SmallVectorImpl<VariableGEPIndex> &Src) {
+  if (Src.empty())
+    return;
+
+  for (unsigned i = 0, e = Src.size(); i != e; ++i) {
+    const Value *V = Src[i].V;
+    ExtensionKind Extension = Src[i].Extension;
+    int64_t Scale = Src[i].Scale;
+
+    // Find V in Dest.  This is N^2, but pointer indices almost never have more
+    // than a few variable indexes.
+    for (unsigned j = 0, e = Dest.size(); j != e; ++j) {
+      if (!isValueEqualInPotentialCycles(Dest[j].V, V) ||
+          Dest[j].Extension != Extension)
+        continue;
+
+      // If we found it, subtract off Scale V's from the entry in Dest.  If it
+      // goes to zero, remove the entry.
+      if (Dest[j].Scale != Scale)
+        Dest[j].Scale -= Scale;
+      else
+        Dest.erase(Dest.begin() + j);
+      Scale = 0;
+      break;
+    }
+
+    // If we didn't consume this entry, add it to the end of the Dest list.
+    if (Scale) {
+      VariableGEPIndex Entry = { V, Extension, -Scale };
+      Dest.push_back(Entry);
+    }
+  }
+}
diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp
index 62f3ab1..63049a5 100644
--- a/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/lib/Analysis/BlockFrequencyInfo.cpp
@@ -16,8 +16,8 @@
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/Passes.h"
+#include "llvm/IR/CFG.h"
 #include "llvm/InitializePasses.h"
-#include "llvm/Support/CFG.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/GraphWriter.h"
@@ -86,7 +86,7 @@ struct DOTGraphTraits<BlockFrequencyInfo*> : public DefaultDOTGraphTraits {
     OS << Node->getName().str() << ":";
     switch (ViewBlockFreqPropagationDAG) {
     case GVDT_Fraction:
-      Graph->getBlockFreq(Node).print(OS);
+      Graph->printBlockFreq(OS, Node);
       break;
     case GVDT_Integer:
       OS << Graph->getBlockFreq(Node).getFrequency();
@@ -114,12 +114,9 @@ char BlockFrequencyInfo::ID = 0;
 
 BlockFrequencyInfo::BlockFrequencyInfo() : FunctionPass(ID) {
   initializeBlockFrequencyInfoPass(*PassRegistry::getPassRegistry());
-  BFI = new BlockFrequencyImpl<BasicBlock, Function, BranchProbabilityInfo>();
 }
 
-BlockFrequencyInfo::~BlockFrequencyInfo() {
-  delete BFI;
-}
+BlockFrequencyInfo::~BlockFrequencyInfo() {}
 
 void BlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<BranchProbabilityInfo>();
@@ -128,6 +125,8 @@ void BlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
 
 bool BlockFrequencyInfo::runOnFunction(Function &F) {
   BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>();
+  if (!BFI)
+    BFI.reset(new ImplType);
   BFI->doFunction(&F, &BPI);
 #ifndef NDEBUG
   if (ViewBlockFreqPropagationDAG != GVDT_None)
@@ -136,12 +135,14 @@ bool BlockFrequencyInfo::runOnFunction(Function &F) {
   return false;
 }
 
+void BlockFrequencyInfo::releaseMemory() { BFI.reset(); }
+
 void BlockFrequencyInfo::print(raw_ostream &O, const Module *) const {
   if (BFI) BFI->print(O);
 }
 
 BlockFrequency BlockFrequencyInfo::getBlockFreq(const BasicBlock *BB) const {
-  return BFI->getBlockFreq(BB);
+  return BFI ? BFI->getBlockFreq(BB) : 0;
 }
 
 /// Pop up a ghostview window with the current block frequency propagation
@@ -157,5 +158,20 @@ void BlockFrequencyInfo::view() const {
 }
 
 const Function *BlockFrequencyInfo::getFunction() const {
-  return BFI->Fn;
+  return BFI ? BFI->Fn : nullptr;
+}
+
+raw_ostream &BlockFrequencyInfo::
+printBlockFreq(raw_ostream &OS, const BlockFrequency Freq) const {
+  return BFI ? BFI->printBlockFreq(OS, Freq) : OS;
+}
+
+raw_ostream &
+BlockFrequencyInfo::printBlockFreq(raw_ostream &OS,
+                                   const BasicBlock *BB) const {
+  return BFI ? BFI->printBlockFreq(OS, BB) : OS;
+}
+
+uint64_t BlockFrequencyInfo::getEntryFreq() const {
+  return BFI ? BFI->getEntryFreq() : 0;
 }
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index 86560ca..b901c54 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -11,15 +11,16 @@
 //
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "branch-prob"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
-#include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
 
 using namespace llvm;
@@ -483,6 +484,8 @@ void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const {
 }
 
 bool BranchProbabilityInfo::runOnFunction(Function &F) {
+  DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName()
+               << " ----\n\n");
   LastF = &F; // Store the last function we ran on for printing.
   LI = &getAnalysis<LoopInfo>();
   assert(PostDominatedByUnreachable.empty());
@@ -591,6 +594,13 @@ getEdgeWeight(const BasicBlock *Src, unsigned IndexInSuccessors) const {
   return DEFAULT_WEIGHT;
 }
 
+uint32_t
+BranchProbabilityInfo::
+getEdgeWeight(const BasicBlock *Src, succ_const_iterator Dst) const {
+  size_t index = std::distance(succ_begin(Src), Dst);
+  return getEdgeWeight(Src, index);
+}
+
 /// Get the raw edge weight calculated for the block pair. This returns the sum
 /// of all raw edge weights from Src to Dst.
 uint32_t BranchProbabilityInfo::
diff --git a/lib/Analysis/CFG.cpp b/lib/Analysis/CFG.cpp
index c3f32d3..6963760 100644
--- a/lib/Analysis/CFG.cpp
+++ b/lib/Analysis/CFG.cpp
@@ -13,10 +13,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/CFG.h"
-
 #include "llvm/ADT/SmallSet.h"
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/Dominators.h"
 
 using namespace llvm;
 
@@ -102,15 +101,9 @@ bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
 
   // If AllowIdenticalEdges is true, then we allow this edge to be considered
   // non-critical iff all preds come from TI's block.
-  while (I != E) {
-    const BasicBlock *P = *I;
-    if (P != FirstPred)
+  for (; I != E; ++I)
+    if (*I != FirstPred)
       return true;
-    // Note: leave this as is until no one ever compiles with either gcc 4.0.1
-    // or Xcode 2. This seems to work around the pred_iterator assert in PR 2207
-    E = pred_end(P);
-    ++I;
-  }
   return false;
 }
 
@@ -169,8 +162,7 @@ static bool isPotentiallyReachableInner(SmallVectorImpl<BasicBlock *> &Worklist,
       // ignoring any other blocks inside the loop body.
       Outer->getExitBlocks(Worklist);
     } else {
-      for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I)
-        Worklist.push_back(*I);
+      Worklist.append(succ_begin(BB), succ_end(BB));
     }
   } while (!Worklist.empty());
 
@@ -223,8 +215,7 @@ bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B,
       return false;
 
     // Otherwise, continue doing the normal per-BB CFG walk.
-    for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I)
-      Worklist.push_back(*I);
+    Worklist.append(succ_begin(BB), succ_end(BB));
 
     if (Worklist.empty()) {
       // We've proven that there's no path!
diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp
index 9b6879a..537d6d1 100644
--- a/lib/Analysis/CFGPrinter.cpp
+++ b/lib/Analysis/CFGPrinter.cpp
@@ -28,14 +28,14 @@ namespace {
       initializeCFGOnlyViewerPass(*PassRegistry::getPassRegistry());
     }
 
-    virtual bool runOnFunction(Function &F) {
+    bool runOnFunction(Function &F) override {
       F.viewCFG();
       return false;
     }
 
-    void print(raw_ostream &OS, const Module* = 0) const {}
+    void print(raw_ostream &OS, const Module* = 0) const override {}
 
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesAll();
     }
   };
@@ -51,14 +51,14 @@ namespace {
       initializeCFGOnlyViewerPass(*PassRegistry::getPassRegistry());
     }
 
-    virtual bool runOnFunction(Function &F) {
+    bool runOnFunction(Function &F) override {
       F.viewCFGOnly();
       return false;
     }
 
-    void print(raw_ostream &OS, const Module* = 0) const {}
+    void print(raw_ostream &OS, const Module* = 0) const override {}
 
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesAll();
     }
   };
@@ -75,12 +75,12 @@ namespace {
       initializeCFGPrinterPass(*PassRegistry::getPassRegistry());
     }
 
-    virtual bool runOnFunction(Function &F) {
+    bool runOnFunction(Function &F) override {
       std::string Filename = "cfg." + F.getName().str() + ".dot";
       errs() << "Writing '" << Filename << "'...";
       
       std::string ErrorInfo;
-      raw_fd_ostream File(Filename.c_str(), ErrorInfo);
+      raw_fd_ostream File(Filename.c_str(), ErrorInfo, sys::fs::F_Text);
 
       if (ErrorInfo.empty())
         WriteGraph(File, (const Function*)&F);
@@ -90,9 +90,9 @@ namespace {
       return false;
     }
 
-    void print(raw_ostream &OS, const Module* = 0) const {}
+    void print(raw_ostream &OS, const Module* = 0) const override {}
 
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesAll();
     }
   };
@@ -108,13 +108,13 @@ namespace {
     CFGOnlyPrinter() : FunctionPass(ID) {
       initializeCFGOnlyPrinterPass(*PassRegistry::getPassRegistry());
     }
-    
-    virtual bool runOnFunction(Function &F) {
+
+    bool runOnFunction(Function &F) override {
       std::string Filename = "cfg." + F.getName().str() + ".dot";
       errs() << "Writing '" << Filename << "'...";
 
       std::string ErrorInfo;
-      raw_fd_ostream File(Filename.c_str(), ErrorInfo);
+      raw_fd_ostream File(Filename.c_str(), ErrorInfo, sys::fs::F_Text);
       
       if (ErrorInfo.empty())
         WriteGraph(File, (const Function*)&F, true);
@@ -123,9 +123,9 @@ namespace {
       errs() << "\n";
       return false;
     }
-    void print(raw_ostream &OS, const Module* = 0) const {}
+    void print(raw_ostream &OS, const Module* = 0) const override {}
 
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesAll();
     }
   };
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index 3624aac..c6d4573 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -23,6 +23,7 @@ add_llvm_library(LLVMAnalysis
   InstructionSimplify.cpp
   Interval.cpp
   IntervalPartition.cpp
+  LazyCallGraph.cpp
   LazyValueInfo.cpp
   LibCallAliasAnalysis.cpp
   LibCallSemantics.cpp
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index 79fab1b..3708e60 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -20,24 +20,24 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/Support/CallSite.h"
 
 using namespace llvm;
 
 CaptureTracker::~CaptureTracker() {}
 
-bool CaptureTracker::shouldExplore(Use *U) { return true; }
+bool CaptureTracker::shouldExplore(const Use *U) { return true; }
 
 namespace {
   struct SimpleCaptureTracker : public CaptureTracker {
     explicit SimpleCaptureTracker(bool ReturnCaptures)
       : ReturnCaptures(ReturnCaptures), Captured(false) {}
 
-    void tooManyUses() { Captured = true; }
+    void tooManyUses() override { Captured = true; }
 
-    bool captured(Use *U) {
+    bool captured(const Use *U) override {
       if (isa<ReturnInst>(U->getUser()) && !ReturnCaptures)
         return false;
 
@@ -81,25 +81,23 @@ static int const Threshold = 20;
 
 void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) {
   assert(V->getType()->isPointerTy() && "Capture is for pointers only!");
-  SmallVector<Use*, Threshold> Worklist;
-  SmallSet<Use*, Threshold> Visited;
+  SmallVector<const Use *, Threshold> Worklist;
+  SmallSet<const Use *, Threshold> Visited;
   int Count = 0;
 
-  for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
-       UI != UE; ++UI) {
+  for (const Use &U : V->uses()) {
     // If there are lots of uses, conservatively say that the value
     // is captured to avoid taking too much compile time.
     if (Count++ >= Threshold)
       return Tracker->tooManyUses();
 
-    Use *U = &UI.getUse();
-    if (!Tracker->shouldExplore(U)) continue;
-    Visited.insert(U);
-    Worklist.push_back(U);
+    if (!Tracker->shouldExplore(&U)) continue;
+    Visited.insert(&U);
+    Worklist.push_back(&U);
   }
 
   while (!Worklist.empty()) {
-    Use *U = Worklist.pop_back_val();
+    const Use *U = Worklist.pop_back_val();
     Instruction *I = cast<Instruction>(U->getUser());
     V = U->get();
 
@@ -145,19 +143,18 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) {
     case Instruction::GetElementPtr:
     case Instruction::PHI:
     case Instruction::Select:
+    case Instruction::AddrSpaceCast:
       // The original value is not captured via this if the new value isn't.
       Count = 0;
-      for (Instruction::use_iterator UI = I->use_begin(), UE = I->use_end();
-           UI != UE; ++UI) {
+      for (Use &UU : I->uses()) {
         // If there are lots of uses, conservatively say that the value
         // is captured to avoid taking too much compile time.
         if (Count++ >= Threshold)
           return Tracker->tooManyUses();
 
-        Use *U = &UI.getUse();
-        if (Visited.insert(U))
-          if (Tracker->shouldExplore(U))
-            Worklist.push_back(U);
+        if (Visited.insert(&UU))
+          if (Tracker->shouldExplore(&UU))
+            Worklist.push_back(&UU);
       }
       break;
     case Instruction::ICmp:
diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp
index 8cda01a..4c8a093 100644
--- a/lib/Analysis/CodeMetrics.cpp
+++ b/lib/Analysis/CodeMetrics.cpp
@@ -13,10 +13,10 @@
 
 #include "llvm/Analysis/CodeMetrics.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IntrinsicInst.h"
-#include "llvm/Support/CallSite.h"
 
 using namespace llvm;
 
@@ -65,11 +65,11 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
       ++NumVectorInsts;
 
     if (const CallInst *CI = dyn_cast<CallInst>(II))
-      if (CI->hasFnAttr(Attribute::NoDuplicate))
+      if (CI->cannotDuplicate())
         notDuplicatable = true;
 
     if (const InvokeInst *InvI = dyn_cast<InvokeInst>(II))
-      if (InvI->hasFnAttr(Attribute::NoDuplicate))
+      if (InvI->cannotDuplicate())
         notDuplicatable = true;
 
     NumInsts += TTI.getUserCost(&*II);
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 3d32232..782acfa 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -25,13 +25,13 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FEnv.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include <cerrno>
@@ -1186,6 +1186,7 @@ bool llvm::canConstantFoldCallTo(const Function *F) {
   case Intrinsic::exp:
   case Intrinsic::exp2:
   case Intrinsic::floor:
+  case Intrinsic::ceil:
   case Intrinsic::sqrt:
   case Intrinsic::pow:
   case Intrinsic::powi:
@@ -1193,6 +1194,10 @@ bool llvm::canConstantFoldCallTo(const Function *F) {
   case Intrinsic::ctpop:
   case Intrinsic::ctlz:
   case Intrinsic::cttz:
+  case Intrinsic::fma:
+  case Intrinsic::fmuladd:
+  case Intrinsic::copysign:
+  case Intrinsic::round:
   case Intrinsic::sadd_with_overflow:
   case Intrinsic::uadd_with_overflow:
   case Intrinsic::ssub_with_overflow:
@@ -1244,15 +1249,7 @@ bool llvm::canConstantFoldCallTo(const Function *F) {
   }
 }
 
-static Constant *ConstantFoldFP(double (*NativeFP)(double), double V,
-                                Type *Ty) {
-  sys::llvm_fenv_clearexcept();
-  V = NativeFP(V);
-  if (sys::llvm_fenv_testexcept()) {
-    sys::llvm_fenv_clearexcept();
-    return 0;
-  }
-
+static Constant *GetConstantFoldFPValue(double V, Type *Ty) {
   if (Ty->isHalfTy()) {
     APFloat APF(V);
     bool unused;
@@ -1264,6 +1261,19 @@ static Constant *ConstantFoldFP(double (*NativeFP)(double), double V,
   if (Ty->isDoubleTy())
     return ConstantFP::get(Ty->getContext(), APFloat(V));
   llvm_unreachable("Can only constant fold half/float/double");
+
+}
+
+static Constant *ConstantFoldFP(double (*NativeFP)(double), double V,
+                                Type *Ty) {
+  sys::llvm_fenv_clearexcept();
+  V = NativeFP(V);
+  if (sys::llvm_fenv_testexcept()) {
+    sys::llvm_fenv_clearexcept();
+    return 0;
+  }
+
+  return GetConstantFoldFPValue(V, Ty);
 }
 
 static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
@@ -1275,17 +1285,7 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
     return 0;
   }
 
-  if (Ty->isHalfTy()) {
-    APFloat APF(V);
-    bool unused;
-    APF.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &unused);
-    return ConstantFP::get(Ty->getContext(), APF);
-  }
-  if (Ty->isFloatTy())
-    return ConstantFP::get(Ty->getContext(), APFloat((float)V));
-  if (Ty->isDoubleTy())
-    return ConstantFP::get(Ty->getContext(), APFloat(V));
-  llvm_unreachable("Can only constant fold half/float/double");
+  return GetConstantFoldFPValue(V, Ty);
 }
 
 /// ConstantFoldConvertToInt - Attempt to an SSE floating point to integer
@@ -1315,32 +1315,44 @@ static Constant *ConstantFoldConvertToInt(const APFloat &Val,
   return ConstantInt::get(Ty, UIntVal, /*isSigned=*/true);
 }
 
-/// ConstantFoldCall - Attempt to constant fold a call to the specified function
-/// with the specified arguments, returning null if unsuccessful.
-Constant *
-llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
-                       const TargetLibraryInfo *TLI) {
-  if (!F->hasName())
-    return 0;
-  StringRef Name = F->getName();
+static double getValueAsDouble(ConstantFP *Op) {
+  Type *Ty = Op->getType();
 
-  Type *Ty = F->getReturnType();
+  if (Ty->isFloatTy())
+    return Op->getValueAPF().convertToFloat();
+
+  if (Ty->isDoubleTy())
+    return Op->getValueAPF().convertToDouble();
+
+  bool unused;
+  APFloat APF = Op->getValueAPF();
+  APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused);
+  return APF.convertToDouble();
+}
+
+static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
+                                        Type *Ty, ArrayRef<Constant *> Operands,
+                                        const TargetLibraryInfo *TLI) {
   if (Operands.size() == 1) {
     if (ConstantFP *Op = dyn_cast<ConstantFP>(Operands[0])) {
-      if (F->getIntrinsicID() == Intrinsic::convert_to_fp16) {
+      if (IntrinsicID == Intrinsic::convert_to_fp16) {
         APFloat Val(Op->getValueAPF());
 
         bool lost = false;
         Val.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &lost);
 
-        return ConstantInt::get(F->getContext(), Val.bitcastToAPInt());
+        return ConstantInt::get(Ty->getContext(), Val.bitcastToAPInt());
       }
-      if (!TLI)
-        return 0;
 
       if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
         return 0;
 
+      if (IntrinsicID == Intrinsic::round) {
+        APFloat V = Op->getValueAPF();
+        V.roundToIntegral(APFloat::rmNearestTiesToAway);
+        return ConstantFP::get(Ty->getContext(), V);
+      }
+
       /// We only fold functions with finite arguments. Folding NaN and inf is
       /// likely to be aborted with an exception anyway, and some host libms
       /// have known errors raising exceptions.
@@ -1351,19 +1363,9 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
       /// the host native double versions.  Float versions are not called
       /// directly but for all these it is true (float)(f((double)arg)) ==
       /// f(arg).  Long double not supported yet.
-      double V;
-      if (Ty->isFloatTy())
-        V = Op->getValueAPF().convertToFloat();
-      else if (Ty->isDoubleTy())
-        V = Op->getValueAPF().convertToDouble();
-      else {
-        bool unused;
-        APFloat APF = Op->getValueAPF();
-        APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused);
-        V = APF.convertToDouble();
-      }
+      double V = getValueAsDouble(Op);
 
-      switch (F->getIntrinsicID()) {
+      switch (IntrinsicID) {
         default: break;
         case Intrinsic::fabs:
           return ConstantFoldFP(fabs, V, Ty);
@@ -1389,8 +1391,13 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
 #endif
         case Intrinsic::floor:
           return ConstantFoldFP(floor, V, Ty);
+        case Intrinsic::ceil:
+          return ConstantFoldFP(ceil, V, Ty);
       }
 
+      if (!TLI)
+        return 0;
+
       switch (Name[0]) {
       case 'a':
         if (Name == "acos" && TLI->has(LibFunc::acos))
@@ -1431,7 +1438,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
           return ConstantFoldFP(log, V, Ty);
         else if (Name == "log10" && V > 0 && TLI->has(LibFunc::log10))
           return ConstantFoldFP(log10, V, Ty);
-        else if (F->getIntrinsicID() == Intrinsic::sqrt &&
+        else if (IntrinsicID == Intrinsic::sqrt &&
                  (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())) {
           if (V >= -0.0)
             return ConstantFoldFP(sqrt, V, Ty);
@@ -1464,9 +1471,9 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
     }
 
     if (ConstantInt *Op = dyn_cast<ConstantInt>(Operands[0])) {
-      switch (F->getIntrinsicID()) {
+      switch (IntrinsicID) {
       case Intrinsic::bswap:
-        return ConstantInt::get(F->getContext(), Op->getValue().byteSwap());
+        return ConstantInt::get(Ty->getContext(), Op->getValue().byteSwap());
       case Intrinsic::ctpop:
         return ConstantInt::get(Ty, Op->getValue().countPopulation());
       case Intrinsic::convert_from_fp16: {
@@ -1481,7 +1488,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
         assert(status == APFloat::opOK && !lost &&
                "Precision lost during fp16 constfolding");
 
-        return ConstantFP::get(F->getContext(), Val);
+        return ConstantFP::get(Ty->getContext(), Val);
       }
       default:
         return 0;
@@ -1492,7 +1499,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
     if (isa<ConstantVector>(Operands[0]) ||
         isa<ConstantDataVector>(Operands[0])) {
       Constant *Op = cast<Constant>(Operands[0]);
-      switch (F->getIntrinsicID()) {
+      switch (IntrinsicID) {
       default: break;
       case Intrinsic::x86_sse_cvtss2si:
       case Intrinsic::x86_sse_cvtss2si64:
@@ -1514,7 +1521,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
     }
 
     if (isa<UndefValue>(Operands[0])) {
-      if (F->getIntrinsicID() == Intrinsic::bswap)
+      if (IntrinsicID == Intrinsic::bswap)
         return Operands[0];
       return 0;
     }
@@ -1526,37 +1533,22 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
     if (ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
       if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
         return 0;
-      double Op1V;
-      if (Ty->isFloatTy())
-        Op1V = Op1->getValueAPF().convertToFloat();
-      else if (Ty->isDoubleTy())
-        Op1V = Op1->getValueAPF().convertToDouble();
-      else {
-        bool unused;
-        APFloat APF = Op1->getValueAPF();
-        APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused);
-        Op1V = APF.convertToDouble();
-      }
+      double Op1V = getValueAsDouble(Op1);
 
       if (ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
         if (Op2->getType() != Op1->getType())
           return 0;
 
-        double Op2V;
-        if (Ty->isFloatTy())
-          Op2V = Op2->getValueAPF().convertToFloat();
-        else if (Ty->isDoubleTy())
-          Op2V = Op2->getValueAPF().convertToDouble();
-        else {
-          bool unused;
-          APFloat APF = Op2->getValueAPF();
-          APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused);
-          Op2V = APF.convertToDouble();
-        }
-
-        if (F->getIntrinsicID() == Intrinsic::pow) {
+        double Op2V = getValueAsDouble(Op2);
+        if (IntrinsicID == Intrinsic::pow) {
           return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
         }
+        if (IntrinsicID == Intrinsic::copysign) {
+          APFloat V1 = Op1->getValueAPF();
+          APFloat V2 = Op2->getValueAPF();
+          V1.copySign(V2);
+          return ConstantFP::get(Ty->getContext(), V1);
+        }
         if (!TLI)
           return 0;
         if (Name == "pow" && TLI->has(LibFunc::pow))
@@ -1566,16 +1558,16 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
         if (Name == "atan2" && TLI->has(LibFunc::atan2))
           return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
       } else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
-        if (F->getIntrinsicID() == Intrinsic::powi && Ty->isHalfTy())
-          return ConstantFP::get(F->getContext(),
+        if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy())
+          return ConstantFP::get(Ty->getContext(),
                                  APFloat((float)std::pow((float)Op1V,
                                                  (int)Op2C->getZExtValue())));
-        if (F->getIntrinsicID() == Intrinsic::powi && Ty->isFloatTy())
-          return ConstantFP::get(F->getContext(),
+        if (IntrinsicID == Intrinsic::powi && Ty->isFloatTy())
+          return ConstantFP::get(Ty->getContext(),
                                  APFloat((float)std::pow((float)Op1V,
                                                  (int)Op2C->getZExtValue())));
-        if (F->getIntrinsicID() == Intrinsic::powi && Ty->isDoubleTy())
-          return ConstantFP::get(F->getContext(),
+        if (IntrinsicID == Intrinsic::powi && Ty->isDoubleTy())
+          return ConstantFP::get(Ty->getContext(),
                                  APFloat((double)std::pow((double)Op1V,
                                                    (int)Op2C->getZExtValue())));
       }
@@ -1584,7 +1576,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
 
     if (ConstantInt *Op1 = dyn_cast<ConstantInt>(Operands[0])) {
       if (ConstantInt *Op2 = dyn_cast<ConstantInt>(Operands[1])) {
-        switch (F->getIntrinsicID()) {
+        switch (IntrinsicID) {
         default: break;
         case Intrinsic::sadd_with_overflow:
         case Intrinsic::uadd_with_overflow:
@@ -1594,7 +1586,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
         case Intrinsic::umul_with_overflow: {
           APInt Res;
           bool Overflow;
-          switch (F->getIntrinsicID()) {
+          switch (IntrinsicID) {
           default: llvm_unreachable("Invalid case");
           case Intrinsic::sadd_with_overflow:
             Res = Op1->getValue().sadd_ov(Op2->getValue(), Overflow);
@@ -1616,10 +1608,10 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
             break;
           }
           Constant *Ops[] = {
-            ConstantInt::get(F->getContext(), Res),
-            ConstantInt::get(Type::getInt1Ty(F->getContext()), Overflow)
+            ConstantInt::get(Ty->getContext(), Res),
+            ConstantInt::get(Type::getInt1Ty(Ty->getContext()), Overflow)
           };
-          return ConstantStruct::get(cast<StructType>(F->getReturnType()), Ops);
+          return ConstantStruct::get(cast<StructType>(Ty), Ops);
         }
         case Intrinsic::cttz:
           if (Op2->isOne() && Op1->isZero()) // cttz(0, 1) is undef.
@@ -1636,5 +1628,75 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
     }
     return 0;
   }
+
+  if (Operands.size() != 3)
+    return 0;
+
+  if (const ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
+    if (const ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
+      if (const ConstantFP *Op3 = dyn_cast<ConstantFP>(Operands[2])) {
+        switch (IntrinsicID) {
+        default: break;
+        case Intrinsic::fma:
+        case Intrinsic::fmuladd: {
+          APFloat V = Op1->getValueAPF();
+          APFloat::opStatus s = V.fusedMultiplyAdd(Op2->getValueAPF(),
+                                                   Op3->getValueAPF(),
+                                                   APFloat::rmNearestTiesToEven);
+          if (s != APFloat::opInvalidOp)
+            return ConstantFP::get(Ty->getContext(), V);
+
+          return 0;
+        }
+        }
+      }
+    }
+  }
+
   return 0;
 }
+
+static Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID,
+                                        VectorType *VTy,
+                                        ArrayRef<Constant *> Operands,
+                                        const TargetLibraryInfo *TLI) {
+  SmallVector<Constant *, 4> Result(VTy->getNumElements());
+  SmallVector<Constant *, 4> Lane(Operands.size());
+  Type *Ty = VTy->getElementType();
+
+  for (unsigned I = 0, E = VTy->getNumElements(); I != E; ++I) {
+    // Gather a column of constants.
+    for (unsigned J = 0, JE = Operands.size(); J != JE; ++J) {
+      Constant *Agg = Operands[J]->getAggregateElement(I);
+      if (!Agg)
+        return nullptr;
+
+      Lane[J] = Agg;
+    }
+
+    // Use the regular scalar folding to simplify this column.
+    Constant *Folded = ConstantFoldScalarCall(Name, IntrinsicID, Ty, Lane, TLI);
+    if (!Folded)
+      return nullptr;
+    Result[I] = Folded;
+  }
+
+  return ConstantVector::get(Result);
+}
+
+/// ConstantFoldCall - Attempt to constant fold a call to the specified function
+/// with the specified arguments, returning null if unsuccessful.
+Constant *
+llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
+                       const TargetLibraryInfo *TLI) {
+  if (!F->hasName())
+    return 0;
+  StringRef Name = F->getName();
+
+  Type *Ty = F->getReturnType();
+
+  if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+    return ConstantFoldVectorCall(Name, F->getIntrinsicID(), VTy, Operands, TLI);
+
+  return ConstantFoldScalarCall(Name, F->getIntrinsicID(), Ty, Operands, TLI);
+}
diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp
index f943258..b49211d 100644
--- a/lib/Analysis/CostModel.cpp
+++ b/lib/Analysis/CostModel.cpp
@@ -53,9 +53,9 @@ namespace {
     unsigned getInstructionCost(const Instruction *I) const;
 
   private:
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-    virtual bool runOnFunction(Function &F);
-    virtual void print(raw_ostream &OS, const Module*) const;
+    void getAnalysisUsage(AnalysisUsage &AU) const override;
+    bool runOnFunction(Function &F) override;
+    void print(raw_ostream &OS, const Module*) const override;
 
     /// The function that we analyze.
     Function *F;
@@ -98,15 +98,12 @@ static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) {
   TargetTransformInfo::OperandValueKind OpInfo =
     TargetTransformInfo::OK_AnyValue;
 
-  // Check for a splat of a constant.
-  ConstantDataVector *CDV = 0;
-  if ((CDV = dyn_cast<ConstantDataVector>(V)))
-    if (CDV->getSplatValue() != NULL)
-      OpInfo = TargetTransformInfo::OK_UniformConstantValue;
-  ConstantVector *CV = 0;
-  if ((CV = dyn_cast<ConstantVector>(V)))
-    if (CV->getSplatValue() != NULL)
+  // Check for a splat of a constant or for a non uniform vector of constants.
+  if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
+    OpInfo = TargetTransformInfo::OK_NonUniformConstantValue;
+    if (cast<Constant>(V)->getSplatValue() != NULL)
       OpInfo = TargetTransformInfo::OK_UniformConstantValue;
+  }
 
   return OpInfo;
 }
@@ -337,7 +334,7 @@ static bool matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot,
 
     Value *NextRdxOp;
     ShuffleVectorInst *Shuffle;
-    tie(NextRdxOp, Shuffle) = getShuffleAndOtherOprd(BinOp);
+    std::tie(NextRdxOp, Shuffle) = getShuffleAndOtherOprd(BinOp);
 
     // Check the current reduction operation and the shuffle use the same value.
     if (Shuffle == 0)
@@ -439,7 +436,8 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
   case Instruction::UIToFP:
   case Instruction::Trunc:
   case Instruction::FPTrunc:
-  case Instruction::BitCast: {
+  case Instruction::BitCast:
+  case Instruction::AddrSpaceCast: {
     Type *SrcTy = I->getOperand(0)->getType();
     return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy);
   }
diff --git a/lib/Analysis/Delinearization.cpp b/lib/Analysis/Delinearization.cpp
index 3ed0609..fd4a2f0 100644
--- a/lib/Analysis/Delinearization.cpp
+++ b/lib/Analysis/Delinearization.cpp
@@ -17,19 +17,19 @@
 #define DL_NAME "delinearize"
 #define DEBUG_TYPE DL_NAME
 #include "llvm/IR/Constants.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
-#include "llvm/Pass.h"
 #include "llvm/IR/Type.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/InstIterator.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
@@ -49,9 +49,9 @@ public:
   Delinearization() : FunctionPass(ID) {
     initializeDelinearizationPass(*PassRegistry::getPassRegistry());
   }
-  virtual bool runOnFunction(Function &F);
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-  virtual void print(raw_ostream &O, const Module *M = 0) const;
+  bool runOnFunction(Function &F) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  void print(raw_ostream &O, const Module *M = 0) const override;
 };
 
 } // end anonymous namespace
diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp
index 3b3e2ef..ff98611 100644
--- a/lib/Analysis/DependenceAnalysis.cpp
+++ b/lib/Analysis/DependenceAnalysis.cpp
@@ -60,11 +60,11 @@
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/InstIterator.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
@@ -1275,8 +1275,8 @@ bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff,
 //
 // Program 2.1, page 29.
 // Computes the GCD of AM and BM.
-// Also finds a solution to the equation ax - by = gdc(a, b).
-// Returns true iff the gcd divides Delta.
+// Also finds a solution to the equation ax - by = gcd(a, b).
+// Returns true if dependence disproved; i.e., gcd does not divide Delta.
 static
 bool findGCD(unsigned Bits, APInt AM, APInt BM, APInt Delta,
              APInt &G, APInt &X, APInt &Y) {
@@ -3178,7 +3178,7 @@ void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level,
 
 /// Check if we can delinearize the subscripts. If the SCEVs representing the
 /// source and destination array references are recurrences on a nested loop,
-/// this function flattens the nested recurrences into seperate recurrences
+/// this function flattens the nested recurrences into separate recurrences
 /// for each loop level.
 bool
 DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, const SCEV *DstSCEV,
@@ -3189,14 +3189,42 @@ DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, const SCEV *DstSCEV,
     return false;
 
   SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts, SrcSizes, DstSizes;
-  SrcAR->delinearize(*SE, SrcSubscripts, SrcSizes);
-  DstAR->delinearize(*SE, DstSubscripts, DstSizes);
+  const SCEV *RemainderS = SrcAR->delinearize(*SE, SrcSubscripts, SrcSizes);
+  const SCEV *RemainderD = DstAR->delinearize(*SE, DstSubscripts, DstSizes);
 
   int size = SrcSubscripts.size();
+  // Fail when there is only a subscript: that's a linearized access function.
+  if (size < 2)
+    return false;
+
   int dstSize = DstSubscripts.size();
-  if (size != dstSize || size < 2)
+  // Fail when the number of subscripts in Src and Dst differ.
+  if (size != dstSize)
     return false;
 
+  // Fail when the size of any of the subscripts in Src and Dst differs: the
+  // dependence analysis assumes that elements in the same array have same size.
+  // SCEV delinearization does not have a context based on which it would decide
+  // globally the size of subscripts that would best fit all the array accesses.
+  for (int i = 0; i < size; ++i)
+    if (SrcSizes[i] != DstSizes[i])
+      return false;
+
+  // When the difference in remainders is different than a constant it might be
+  // that the base address of the arrays is not the same.
+  const SCEV *DiffRemainders = SE->getMinusSCEV(RemainderS, RemainderD);
+  if (!isa<SCEVConstant>(DiffRemainders))
+    return false;
+
+  // Normalize the last dimension: integrate the size of the "scalar dimension"
+  // and the remainder of the delinearization.
+  DstSubscripts[size-1] = SE->getMulExpr(DstSubscripts[size-1],
+                                         DstSizes[size-1]);
+  SrcSubscripts[size-1] = SE->getMulExpr(SrcSubscripts[size-1],
+                                         SrcSizes[size-1]);
+  SrcSubscripts[size-1] = SE->getAddExpr(SrcSubscripts[size-1], RemainderS);
+  DstSubscripts[size-1] = SE->getAddExpr(DstSubscripts[size-1], RemainderD);
+
 #ifndef NDEBUG
   DEBUG(errs() << "\nSrcSubscripts: ");
   for (int i = 0; i < size; i++)
diff --git a/lib/Analysis/DomPrinter.cpp b/lib/Analysis/DomPrinter.cpp
index cde4314..0c880df 100644
--- a/lib/Analysis/DomPrinter.cpp
+++ b/lib/Analysis/DomPrinter.cpp
@@ -81,18 +81,32 @@ struct DOTGraphTraits<PostDominatorTree*>
 }
 
 namespace {
-struct DomViewer
-  : public DOTGraphTraitsViewer<DominatorTree, false> {
+struct DominatorTreeWrapperPassAnalysisGraphTraits {
+  static DominatorTree *getGraph(DominatorTreeWrapperPass *DTWP) {
+    return &DTWP->getDomTree();
+  }
+};
+
+struct DomViewer : public DOTGraphTraitsViewer<
+                       DominatorTreeWrapperPass, false, DominatorTree *,
+                       DominatorTreeWrapperPassAnalysisGraphTraits> {
   static char ID;
-  DomViewer() : DOTGraphTraitsViewer<DominatorTree, false>("dom", ID){
+  DomViewer()
+      : DOTGraphTraitsViewer<DominatorTreeWrapperPass, false, DominatorTree *,
+                             DominatorTreeWrapperPassAnalysisGraphTraits>(
+            "dom", ID) {
     initializeDomViewerPass(*PassRegistry::getPassRegistry());
   }
 };
 
-struct DomOnlyViewer
-  : public DOTGraphTraitsViewer<DominatorTree, true> {
+struct DomOnlyViewer : public DOTGraphTraitsViewer<
+                           DominatorTreeWrapperPass, true, DominatorTree *,
+                           DominatorTreeWrapperPassAnalysisGraphTraits> {
   static char ID;
-  DomOnlyViewer() : DOTGraphTraitsViewer<DominatorTree, true>("domonly", ID){
+  DomOnlyViewer()
+      : DOTGraphTraitsViewer<DominatorTreeWrapperPass, true, DominatorTree *,
+                             DominatorTreeWrapperPassAnalysisGraphTraits>(
+            "domonly", ID) {
     initializeDomOnlyViewerPass(*PassRegistry::getPassRegistry());
   }
 };
@@ -136,18 +150,26 @@ INITIALIZE_PASS(PostDomOnlyViewer, "view-postdom-only",
                 false, false)
 
 namespace {
-struct DomPrinter
-  : public DOTGraphTraitsPrinter<DominatorTree, false> {
+struct DomPrinter : public DOTGraphTraitsPrinter<
+                        DominatorTreeWrapperPass, false, DominatorTree *,
+                        DominatorTreeWrapperPassAnalysisGraphTraits> {
   static char ID;
-  DomPrinter() : DOTGraphTraitsPrinter<DominatorTree, false>("dom", ID) {
+  DomPrinter()
+      : DOTGraphTraitsPrinter<DominatorTreeWrapperPass, false, DominatorTree *,
+                              DominatorTreeWrapperPassAnalysisGraphTraits>(
+            "dom", ID) {
     initializeDomPrinterPass(*PassRegistry::getPassRegistry());
   }
 };
 
-struct DomOnlyPrinter
-  : public DOTGraphTraitsPrinter<DominatorTree, true> {
+struct DomOnlyPrinter : public DOTGraphTraitsPrinter<
+                            DominatorTreeWrapperPass, true, DominatorTree *,
+                            DominatorTreeWrapperPassAnalysisGraphTraits> {
   static char ID;
-  DomOnlyPrinter() : DOTGraphTraitsPrinter<DominatorTree, true>("domonly", ID) {
+  DomOnlyPrinter()
+      : DOTGraphTraitsPrinter<DominatorTreeWrapperPass, true, DominatorTree *,
+                              DominatorTreeWrapperPassAnalysisGraphTraits>(
+            "domonly", ID) {
     initializeDomOnlyPrinterPass(*PassRegistry::getPassRegistry());
   }
 };
diff --git a/lib/Analysis/DominanceFrontier.cpp b/lib/Analysis/DominanceFrontier.cpp
index 7e4a89f..f0787f1 100644
--- a/lib/Analysis/DominanceFrontier.cpp
+++ b/lib/Analysis/DominanceFrontier.cpp
@@ -9,7 +9,6 @@
 
 #include "llvm/Analysis/DominanceFrontier.h"
 #include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Assembly/Writer.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -17,7 +16,7 @@ using namespace llvm;
 char DominanceFrontier::ID = 0;
 INITIALIZE_PASS_BEGIN(DominanceFrontier, "domfrontier",
                 "Dominance Frontier Construction", true, true)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_END(DominanceFrontier, "domfrontier",
                 "Dominance Frontier Construction", true, true)
 
@@ -114,7 +113,7 @@ void DominanceFrontierBase::print(raw_ostream &OS, const Module* ) const {
   for (const_iterator I = begin(), E = end(); I != E; ++I) {
     OS << "  DomFrontier for BB ";
     if (I->first)
-      WriteAsOperand(OS, I->first, false);
+      I->first->printAsOperand(OS, false);
     else
       OS << " <<exit node>>";
     OS << " is:\t";
@@ -125,7 +124,7 @@ void DominanceFrontierBase::print(raw_ostream &OS, const Module* ) const {
          I != E; ++I) {
       OS << ' ';
       if (*I)
-        WriteAsOperand(OS, *I, false);
+        (*I)->printAsOperand(OS, false);
       else
         OS << "<<exit node>>";
     }
diff --git a/lib/Analysis/IPA/Android.mk b/lib/Analysis/IPA/Android.mk
index ff8a4ae..d56d931 100644
--- a/lib/Analysis/IPA/Android.mk
+++ b/lib/Analysis/IPA/Android.mk
@@ -25,6 +25,7 @@ include $(BUILD_HOST_STATIC_LIBRARY)
 
 # For the device
 # =====================================================
+ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS))
 include $(CLEAR_VARS)
 
 LOCAL_SRC_FILES := $(analysis_ipa_SRC_FILES)
@@ -36,3 +37,4 @@ LOCAL_MODULE_TAGS := optional
 include $(LLVM_DEVICE_BUILD_MK)
 include $(LLVM_GEN_INTRINSICS_MK)
 include $(BUILD_STATIC_LIBRARY)
+endif
diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp
index f042964..f43675b 100644
--- a/lib/Analysis/IPA/CallGraph.cpp
+++ b/lib/Analysis/IPA/CallGraph.cpp
@@ -8,17 +8,45 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/CallGraph.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
-#include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-CallGraph::CallGraph()
-    : ModulePass(ID), Root(0), ExternalCallingNode(0), CallsExternalNode(0) {
-  initializeCallGraphPass(*PassRegistry::getPassRegistry());
+//===----------------------------------------------------------------------===//
+// Implementations of the CallGraph class methods.
+//
+
+CallGraph::CallGraph(Module &M)
+    : M(M), Root(0), ExternalCallingNode(getOrInsertFunction(0)),
+      CallsExternalNode(new CallGraphNode(0)) {
+  // Add every function to the call graph.
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+    addToCallGraph(I);
+
+  // If we didn't find a main function, use the external call graph node
+  if (Root == 0)
+    Root = ExternalCallingNode;
+}
+
+CallGraph::~CallGraph() {
+  // CallsExternalNode is not in the function map, delete it explicitly.
+  CallsExternalNode->allReferencesDropped();
+  delete CallsExternalNode;
+
+// Reset all node's use counts to zero before deleting them to prevent an
+// assertion from firing.
+#ifndef NDEBUG
+  for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
+       I != E; ++I)
+    I->second->allReferencesDropped();
+#endif
+  for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
+       I != E; ++I)
+    delete I->second;
 }
 
 void CallGraph::addToCallGraph(Function *F) {
@@ -62,59 +90,7 @@ void CallGraph::addToCallGraph(Function *F) {
     }
 }
 
-void CallGraph::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.setPreservesAll();
-}
-
-bool CallGraph::runOnModule(Module &M) {
-  Mod = &M;
-
-  ExternalCallingNode = getOrInsertFunction(0);
-  assert(!CallsExternalNode);
-  CallsExternalNode = new CallGraphNode(0);
-  Root = 0;
-
-  // Add every function to the call graph.
-  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
-    addToCallGraph(I);
-
-  // If we didn't find a main function, use the external call graph node
-  if (Root == 0)
-    Root = ExternalCallingNode;
-
-  return false;
-}
-
-INITIALIZE_PASS(CallGraph, "basiccg", "CallGraph Construction", false, true)
-
-char CallGraph::ID = 0;
-
-void CallGraph::releaseMemory() {
-  /// CallsExternalNode is not in the function map, delete it explicitly.
-  if (CallsExternalNode) {
-    CallsExternalNode->allReferencesDropped();
-    delete CallsExternalNode;
-    CallsExternalNode = 0;
-  }
-
-  if (FunctionMap.empty())
-    return;
-
-// Reset all node's use counts to zero before deleting them to prevent an
-// assertion from firing.
-#ifndef NDEBUG
-  for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
-       I != E; ++I)
-    I->second->allReferencesDropped();
-#endif
-  
-  for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
-      I != E; ++I)
-    delete I->second;
-  FunctionMap.clear();
-}
-
-void CallGraph::print(raw_ostream &OS, const Module*) const {
+void CallGraph::print(raw_ostream &OS) const {
   OS << "CallGraph Root is: ";
   if (Function *F = Root->getFunction())
     OS << F->getName() << "\n";
@@ -125,16 +101,11 @@ void CallGraph::print(raw_ostream &OS, const Module*) const {
   for (CallGraph::const_iterator I = begin(), E = end(); I != E; ++I)
     I->second->print(OS);
 }
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void CallGraph::dump() const {
-  print(dbgs(), 0);
-}
+void CallGraph::dump() const { print(dbgs()); }
 #endif
 
-//===----------------------------------------------------------------------===//
-// Implementations of public modification methods
-//
-
 // removeFunctionFromModule - Unlink the function from this module, returning
 // it.  Because this removes the function from the module, the call graph node
 // is destroyed.  This is only valid if the function does not call any other
@@ -148,7 +119,7 @@ Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) {
   delete CGN;                       // Delete the call graph node for this func
   FunctionMap.erase(F);             // Remove the call graph node from the map
 
-  Mod->getFunctionList().remove(F);
+  M.getFunctionList().remove(F);
   return F;
 }
 
@@ -172,12 +143,17 @@ void CallGraph::spliceFunction(const Function *From, const Function *To) {
 // not already exist.
 CallGraphNode *CallGraph::getOrInsertFunction(const Function *F) {
   CallGraphNode *&CGN = FunctionMap[F];
-  if (CGN) return CGN;
-  
-  assert((!F || F->getParent() == Mod) && "Function not in current module!");
+  if (CGN)
+    return CGN;
+
+  assert((!F || F->getParent() == &M) && "Function not in current module!");
   return CGN = new CallGraphNode(const_cast<Function*>(F));
 }
 
+//===----------------------------------------------------------------------===//
+// Implementations of the CallGraphNode class methods.
+//
+
 void CallGraphNode::print(raw_ostream &OS) const {
   if (Function *F = getFunction())
     OS << "Call graph node for function: '" << F->getName() << "'";
@@ -260,5 +236,52 @@ void CallGraphNode::replaceCallEdge(CallSite CS,
   }
 }
 
+//===----------------------------------------------------------------------===//
+// Out-of-line definitions of CallGraphAnalysis class members.
+//
+
+char CallGraphAnalysis::PassID;
+
+//===----------------------------------------------------------------------===//
+// Implementations of the CallGraphWrapperPass class methods.
+//
+
+CallGraphWrapperPass::CallGraphWrapperPass() : ModulePass(ID) {
+  initializeCallGraphWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+CallGraphWrapperPass::~CallGraphWrapperPass() {}
+
+void CallGraphWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+}
+
+bool CallGraphWrapperPass::runOnModule(Module &M) {
+  // All the real work is done in the constructor for the CallGraph.
+  G.reset(new CallGraph(M));
+  return false;
+}
+
+INITIALIZE_PASS(CallGraphWrapperPass, "basiccg", "CallGraph Construction",
+                false, true)
+
+char CallGraphWrapperPass::ID = 0;
+
+void CallGraphWrapperPass::releaseMemory() { G.reset(0); }
+
+void CallGraphWrapperPass::print(raw_ostream &OS, const Module *) const {
+  if (!G) {
+    OS << "No call graph has been built!\n";
+    return;
+  }
+
+  // Just delegate.
+  G->print(OS);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void CallGraphWrapperPass::dump() const { print(dbgs(), 0); }
+#endif
+
 // Enuse that users of CallGraph.h also link with this file
 DEFINING_FILE_FOR(CallGraph)
diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp
index 182beca..aafc085 100644
--- a/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -49,7 +49,7 @@ public:
 
   /// run - Execute all of the passes scheduled for execution.  Keep track of
   /// whether any of the passes modifies the module, and if so, return true.
-  bool runOnModule(Module &M);
+  bool runOnModule(Module &M) override;
 
   using ModulePass::doInitialization;
   using ModulePass::doFinalization;
@@ -58,21 +58,21 @@ public:
   bool doFinalization(CallGraph &CG);
 
   /// Pass Manager itself does not invalidate any analysis info.
-  void getAnalysisUsage(AnalysisUsage &Info) const {
+  void getAnalysisUsage(AnalysisUsage &Info) const override {
     // CGPassManager walks SCC and it needs CallGraph.
-    Info.addRequired<CallGraph>();
+    Info.addRequired<CallGraphWrapperPass>();
     Info.setPreservesAll();
   }
 
-  virtual const char *getPassName() const {
+  const char *getPassName() const override {
     return "CallGraph Pass Manager";
   }
 
-  virtual PMDataManager *getAsPMDataManager() { return this; }
-  virtual Pass *getAsPass() { return this; }
+  PMDataManager *getAsPMDataManager() override { return this; }
+  Pass *getAsPass() override { return this; }
 
   // Print passes managed by this manager
-  void dumpPassStructure(unsigned Offset) {
+  void dumpPassStructure(unsigned Offset) override {
     errs().indent(Offset*2) << "Call Graph SCC Pass Manager\n";
     for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
       Pass *P = getContainedPass(Index);
@@ -86,7 +86,7 @@ public:
     return static_cast<Pass *>(PassVector[N]);
   }
 
-  virtual PassManagerType getPassManagerType() const { 
+  PassManagerType getPassManagerType() const override {
     return PMT_CallGraphPassManager; 
   }
   
@@ -424,7 +424,7 @@ bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
 /// run - Execute all of the passes scheduled for execution.  Keep track of
 /// whether any of the passes modifies the module, and if so, return true.
 bool CGPassManager::runOnModule(Module &M) {
-  CallGraph &CG = getAnalysis<CallGraph>();
+  CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
   bool Changed = doInitialization(CG);
   
   // Walk the callgraph in bottom-up SCC order.
@@ -570,8 +570,8 @@ void CallGraphSCCPass::assignPassManager(PMStack &PMS,
 /// the call graph.  If the derived class implements this method, it should
 /// always explicitly call the implementation here.
 void CallGraphSCCPass::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<CallGraph>();
-  AU.addPreserved<CallGraph>();
+  AU.addRequired<CallGraphWrapperPass>();
+  AU.addPreserved<CallGraphWrapperPass>();
 }
 
 
@@ -590,12 +590,12 @@ namespace {
     static char ID;
     PrintCallGraphPass(const std::string &B, raw_ostream &o)
       : CallGraphSCCPass(ID), Banner(B), Out(o) {}
-    
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesAll();
     }
-    
-    bool runOnSCC(CallGraphSCC &SCC) {
+
+    bool runOnSCC(CallGraphSCC &SCC) override {
       Out << Banner;
       for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
         (*I)->getFunction()->print(Out);
diff --git a/lib/Analysis/IPA/CallPrinter.cpp b/lib/Analysis/IPA/CallPrinter.cpp
index 306ae7a..68dcd3c 100644
--- a/lib/Analysis/IPA/CallPrinter.cpp
+++ b/lib/Analysis/IPA/CallPrinter.cpp
@@ -22,13 +22,10 @@ using namespace llvm;
 
 namespace llvm {
 
-template<>
-struct DOTGraphTraits<CallGraph*> : public DefaultDOTGraphTraits {
-  DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+template <> struct DOTGraphTraits<CallGraph *> : public DefaultDOTGraphTraits {
+  DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
 
-  static std::string getGraphName(CallGraph *Graph) {
-    return "Call graph";
-  }
+  static std::string getGraphName(CallGraph *Graph) { return "Call graph"; }
 
   std::string getNodeLabel(CallGraphNode *Node, CallGraph *Graph) {
     if (Function *Func = Node->getFunction())
@@ -38,49 +35,57 @@ struct DOTGraphTraits<CallGraph*> : public DefaultDOTGraphTraits {
   }
 };
 
+struct AnalysisCallGraphWrapperPassTraits {
+  static CallGraph *getGraph(CallGraphWrapperPass *P) {
+    return &P->getCallGraph();
+  }
+};
+
 } // end llvm namespace
 
 namespace {
 
 struct CallGraphViewer
-  : public DOTGraphTraitsModuleViewer<CallGraph, true> {
+    : public DOTGraphTraitsModuleViewer<CallGraphWrapperPass, true, CallGraph *,
+                                        AnalysisCallGraphWrapperPassTraits> {
   static char ID;
 
   CallGraphViewer()
-    : DOTGraphTraitsModuleViewer<CallGraph, true>("callgraph", ID) {
+      : DOTGraphTraitsModuleViewer<CallGraphWrapperPass, true, CallGraph *,
+                                   AnalysisCallGraphWrapperPassTraits>(
+            "callgraph", ID) {
     initializeCallGraphViewerPass(*PassRegistry::getPassRegistry());
   }
 };
 
-struct CallGraphPrinter
-  : public DOTGraphTraitsModulePrinter<CallGraph, true> {
+struct CallGraphPrinter : public DOTGraphTraitsModulePrinter<
+                              CallGraphWrapperPass, true, CallGraph *,
+                              AnalysisCallGraphWrapperPassTraits> {
   static char ID;
 
   CallGraphPrinter()
-    : DOTGraphTraitsModulePrinter<CallGraph, true>("callgraph", ID) {
-      initializeCallGraphPrinterPass(*PassRegistry::getPassRegistry());
+      : DOTGraphTraitsModulePrinter<CallGraphWrapperPass, true, CallGraph *,
+                                    AnalysisCallGraphWrapperPassTraits>(
+            "callgraph", ID) {
+    initializeCallGraphPrinterPass(*PassRegistry::getPassRegistry());
   }
 };
 
 } // end anonymous namespace
 
 char CallGraphViewer::ID = 0;
-INITIALIZE_PASS(CallGraphViewer, "view-callgraph",
-                "View call graph",
-                false, false)
+INITIALIZE_PASS(CallGraphViewer, "view-callgraph", "View call graph", false,
+                false)
 
 char CallGraphPrinter::ID = 0;
 INITIALIZE_PASS(CallGraphPrinter, "dot-callgraph",
-                "Print call graph to 'dot' file",
-                false, false)
+                "Print call graph to 'dot' file", false, false)
 
 // Create methods available outside of this file, to use them
 // "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by
 // the link time optimization.
 
-ModulePass *llvm::createCallGraphViewerPass() {
-  return new CallGraphViewer();
-}
+ModulePass *llvm::createCallGraphViewerPass() { return new CallGraphViewer(); }
 
 ModulePass *llvm::createCallGraphPrinterPass() {
   return new CallGraphPrinter();
diff --git a/lib/Analysis/IPA/FindUsedTypes.cpp b/lib/Analysis/IPA/FindUsedTypes.cpp
index 1c4f17d..b37344b 100644
--- a/lib/Analysis/IPA/FindUsedTypes.cpp
+++ b/lib/Analysis/IPA/FindUsedTypes.cpp
@@ -14,11 +14,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/FindUsedTypes.h"
-#include "llvm/Assembly/Writer.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Module.h"
-#include "llvm/Support/InstIterator.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp
index 7ec4644..f4097e4 100644
--- a/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -24,12 +24,12 @@
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/InstIterator.h"
 #include <set>
 using namespace llvm;
 
@@ -94,34 +94,38 @@ namespace {
       initializeGlobalsModRefPass(*PassRegistry::getPassRegistry());
     }
 
-    bool runOnModule(Module &M) {
-      InitializeAliasAnalysis(this);                 // set up super class
-      AnalyzeGlobals(M);                          // find non-addr taken globals
-      AnalyzeCallGraph(getAnalysis<CallGraph>(), M); // Propagate on CG
+    bool runOnModule(Module &M) override {
+      InitializeAliasAnalysis(this);
+
+      // Find non-addr taken globals.
+      AnalyzeGlobals(M);
+
+      // Propagate on CG.
+      AnalyzeCallGraph(getAnalysis<CallGraphWrapperPass>().getCallGraph(), M);
       return false;
     }
 
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       AliasAnalysis::getAnalysisUsage(AU);
-      AU.addRequired<CallGraph>();
+      AU.addRequired<CallGraphWrapperPass>();
       AU.setPreservesAll();                         // Does not transform code
     }
 
     //------------------------------------------------
     // Implement the AliasAnalysis API
     //
-    AliasResult alias(const Location &LocA, const Location &LocB);
+    AliasResult alias(const Location &LocA, const Location &LocB) override;
     ModRefResult getModRefInfo(ImmutableCallSite CS,
-                               const Location &Loc);
+                               const Location &Loc) override;
     ModRefResult getModRefInfo(ImmutableCallSite CS1,
-                               ImmutableCallSite CS2) {
+                               ImmutableCallSite CS2) override {
       return AliasAnalysis::getModRefInfo(CS1, CS2);
     }
 
     /// getModRefBehavior - Return the behavior of the specified function if
     /// called from the specified call site.  The call site may be null in which
     /// case the most generic behavior of this function should be returned.
-    ModRefBehavior getModRefBehavior(const Function *F) {
+    ModRefBehavior getModRefBehavior(const Function *F) override {
       ModRefBehavior Min = UnknownModRefBehavior;
 
       if (FunctionRecord *FR = getFunctionInfo(F)) {
@@ -137,7 +141,7 @@ namespace {
     /// getModRefBehavior - Return the behavior of the specified function if
     /// called from the specified call site.  The call site may be null in which
     /// case the most generic behavior of this function should be returned.
-    ModRefBehavior getModRefBehavior(ImmutableCallSite CS) {
+    ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override {
       ModRefBehavior Min = UnknownModRefBehavior;
 
       if (const Function* F = CS.getCalledFunction())
@@ -151,15 +155,15 @@ namespace {
       return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
     }
 
-    virtual void deleteValue(Value *V);
-    virtual void copyValue(Value *From, Value *To);
-    virtual void addEscapingUse(Use &U);
+    void deleteValue(Value *V) override;
+    void copyValue(Value *From, Value *To) override;
+    void addEscapingUse(Use &U) override;
 
     /// getAdjustedAnalysisPointer - This method is used when a pass implements
     /// an analysis interface through multiple inheritance.  If needed, it
     /// should override this to adjust the this pointer as needed for the
     /// specified pass info.
-    virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+    void *getAdjustedAnalysisPointer(AnalysisID PI) override {
       if (PI == &AliasAnalysis::ID)
         return (AliasAnalysis*)this;
       return this;
@@ -189,7 +193,7 @@ char GlobalsModRef::ID = 0;
 INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis,
                 "globalsmodref-aa", "Simple mod/ref analysis for globals",    
                 false, true, false)
-INITIALIZE_PASS_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
 INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis,
                 "globalsmodref-aa", "Simple mod/ref analysis for globals",    
                 false, true, false)
@@ -248,42 +252,33 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
                                          GlobalValue *OkayStoreDest) {
   if (!V->getType()->isPointerTy()) return true;
 
-  for (Value::use_iterator UI = V->use_begin(), E=V->use_end(); UI != E; ++UI) {
-    User *U = *UI;
-    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+  for (Use &U : V->uses()) {
+    User *I = U.getUser();
+    if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
       Readers.push_back(LI->getParent()->getParent());
-    } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
       if (V == SI->getOperand(1)) {
         Writers.push_back(SI->getParent()->getParent());
       } else if (SI->getOperand(1) != OkayStoreDest) {
         return true;  // Storing the pointer
       }
-    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
-      if (AnalyzeUsesOfPointer(GEP, Readers, Writers)) return true;
-    } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
-      if (AnalyzeUsesOfPointer(BCI, Readers, Writers, OkayStoreDest))
+    } else if (Operator::getOpcode(I) == Instruction::GetElementPtr) {
+      if (AnalyzeUsesOfPointer(I, Readers, Writers))
         return true;
-    } else if (isFreeCall(U, TLI)) {
-      Writers.push_back(cast<Instruction>(U)->getParent()->getParent());
-    } else if (CallInst *CI = dyn_cast<CallInst>(U)) {
-      // Make sure that this is just the function being called, not that it is
-      // passing into the function.
-      for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
-        if (CI->getArgOperand(i) == V) return true;
-    } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) {
+    } else if (Operator::getOpcode(I) == Instruction::BitCast) {
+      if (AnalyzeUsesOfPointer(I, Readers, Writers, OkayStoreDest))
+        return true;
+    } else if (CallSite CS = I) {
       // Make sure that this is just the function being called, not that it is
       // passing into the function.
-      for (unsigned i = 0, e = II->getNumArgOperands(); i != e; ++i)
-        if (II->getArgOperand(i) == V) return true;
-    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
-      if (CE->getOpcode() == Instruction::GetElementPtr ||
-          CE->getOpcode() == Instruction::BitCast) {
-        if (AnalyzeUsesOfPointer(CE, Readers, Writers))
-          return true;
-      } else {
-        return true;
+      if (!CS.isCallee(&U)) {
+        // Detect calls to free.
+        if (isFreeCall(I, TLI))
+          Writers.push_back(CS->getParent()->getParent());
+        else
+          return true; // Argument of an unknown call.
       }
-    } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(U)) {
+    } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
       if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
         return true;  // Allow comparison against null.
     } else {
@@ -308,8 +303,7 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) {
 
   // Walk the user list of the global.  If we find anything other than a direct
   // load or store, bail out.
-  for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){
-    User *U = *I;
+  for (User *U : GV->users()) {
     if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
       // The pointer loaded from the global can only be used in simple ways:
       // we allow addressing of it and loading storing to it.  We do *not* allow
@@ -363,8 +357,7 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) {
 void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) {
   // We do a bottom-up SCC traversal of the call graph.  In other words, we
   // visit all callees before callers (leaf-first).
-  for (scc_iterator<CallGraph*> I = scc_begin(&CG), E = scc_end(&CG); I != E;
-       ++I) {
+  for (scc_iterator<CallGraph*> I = scc_begin(&CG); !I.isAtEnd(); ++I) {
     std::vector<CallGraphNode *> &SCC = *I;
     assert(!SCC.empty() && "SCC with no functions?");
 
diff --git a/lib/Analysis/IPA/IPA.cpp b/lib/Analysis/IPA/IPA.cpp
index 47357cf..b26c052 100644
--- a/lib/Analysis/IPA/IPA.cpp
+++ b/lib/Analysis/IPA/IPA.cpp
@@ -12,14 +12,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/InitializePasses.h"
-#include "llvm/PassRegistry.h"
 #include "llvm-c/Initialization.h"
+#include "llvm/PassRegistry.h"
 
 using namespace llvm;
 
 /// initializeIPA - Initialize all passes linked into the IPA library.
 void llvm::initializeIPA(PassRegistry &Registry) {
-  initializeCallGraphPass(Registry);
+  initializeCallGraphWrapperPassPass(Registry);
   initializeCallGraphPrinterPass(Registry);
   initializeCallGraphViewerPass(Registry);
   initializeFindUsedTypesPass(Registry);
diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp
index 3bc796e..8dafc1c 100644
--- a/lib/Analysis/IPA/InlineCost.cpp
+++ b/lib/Analysis/IPA/InlineCost.cpp
@@ -21,15 +21,15 @@
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Operator.h"
-#include "llvm/InstVisitor.h"
-#include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
@@ -43,7 +43,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   friend class InstVisitor<CallAnalyzer, bool>;
 
   // DataLayout if available, or null.
-  const DataLayout *const TD;
+  const DataLayout *const DL;
 
   /// The TargetTransformInfo available for this compilation.
   const TargetTransformInfo &TTI;
@@ -142,9 +142,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   bool visitUnreachableInst(UnreachableInst &I);
 
 public:
-  CallAnalyzer(const DataLayout *TD, const TargetTransformInfo &TTI,
+  CallAnalyzer(const DataLayout *DL, const TargetTransformInfo &TTI,
                Function &Callee, int Threshold)
-      : TD(TD), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0),
+      : DL(DL), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0),
         IsCallerRecursive(false), IsRecursiveCall(false),
         ExposesReturnsTwice(false), HasDynamicAlloca(false),
         ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
@@ -256,10 +256,10 @@ bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) {
 /// Returns false if unable to compute the offset for any reason. Respects any
 /// simplified values known during the analysis of this callsite.
 bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
-  if (!TD)
+  if (!DL)
     return false;
 
-  unsigned IntPtrWidth = TD->getPointerSizeInBits();
+  unsigned IntPtrWidth = DL->getPointerSizeInBits();
   assert(IntPtrWidth == Offset.getBitWidth());
 
   for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
@@ -275,12 +275,12 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
     // Handle a struct index, which adds its field offset to the pointer.
     if (StructType *STy = dyn_cast<StructType>(*GTI)) {
       unsigned ElementIdx = OpC->getZExtValue();
-      const StructLayout *SL = TD->getStructLayout(STy);
+      const StructLayout *SL = DL->getStructLayout(STy);
       Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
       continue;
     }
 
-    APInt TypeSize(IntPtrWidth, TD->getTypeAllocSize(GTI.getIndexedType()));
+    APInt TypeSize(IntPtrWidth, DL->getTypeAllocSize(GTI.getIndexedType()));
     Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
   }
   return true;
@@ -293,7 +293,7 @@ bool CallAnalyzer::visitAlloca(AllocaInst &I) {
   // Accumulate the allocated size.
   if (I.isStaticAlloca()) {
     Type *Ty = I.getAllocatedType();
-    AllocatedSize += (TD ? TD->getTypeAllocSize(Ty) :
+    AllocatedSize += (DL ? DL->getTypeAllocSize(Ty) :
                       Ty->getPrimitiveSizeInBits());
   }
 
@@ -330,7 +330,7 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
 
   // Try to fold GEPs of constant-offset call site argument pointers. This
   // requires target data and inbounds GEPs.
-  if (TD && I.isInBounds()) {
+  if (DL && I.isInBounds()) {
     // Check if we have a base + offset for the pointer.
     Value *Ptr = I.getPointerOperand();
     std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr);
@@ -399,6 +399,7 @@ bool CallAnalyzer::visitBitCast(BitCastInst &I) {
 }
 
 bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
+  const DataLayout *DL = I.getDataLayout();
   // Propagate constants through ptrtoint.
   Constant *COp = dyn_cast<Constant>(I.getOperand(0));
   if (!COp)
@@ -412,7 +413,7 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
   // Track base/offset pairs when converted to a plain integer provided the
   // integer is large enough to represent the pointer.
   unsigned IntegerSize = I.getType()->getScalarSizeInBits();
-  if (TD && IntegerSize >= TD->getPointerSizeInBits()) {
+  if (DL && IntegerSize >= DL->getPointerSizeInBits()) {
     std::pair<Value *, APInt> BaseAndOffset
       = ConstantOffsetPtrs.lookup(I.getOperand(0));
     if (BaseAndOffset.first)
@@ -435,6 +436,7 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
 }
 
 bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
+  const DataLayout *DL = I.getDataLayout();
   // Propagate constants through ptrtoint.
   Constant *COp = dyn_cast<Constant>(I.getOperand(0));
   if (!COp)
@@ -449,7 +451,7 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
   // modifications provided the integer is not too large.
   Value *Op = I.getOperand(0);
   unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
-  if (TD && IntegerSize <= TD->getPointerSizeInBits()) {
+  if (DL && IntegerSize <= DL->getPointerSizeInBits()) {
     std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
     if (BaseAndOffset.first)
       ConstantOffsetPtrs[&I] = BaseAndOffset;
@@ -488,7 +490,7 @@ bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
     COp = SimplifiedValues.lookup(Operand);
   if (COp)
     if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(),
-                                               COp, TD)) {
+                                               COp, DL)) {
       SimplifiedValues[&I] = C;
       return true;
     }
@@ -523,9 +525,9 @@ bool CallAnalyzer::visitCmpInst(CmpInst &I) {
   // a common base.
   Value *LHSBase, *RHSBase;
   APInt LHSOffset, RHSOffset;
-  llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
+  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
   if (LHSBase) {
-    llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
+    std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
     if (RHSBase && LHSBase == RHSBase) {
       // We have common bases, fold the icmp to a constant based on the
       // offsets.
@@ -573,9 +575,9 @@ bool CallAnalyzer::visitSub(BinaryOperator &I) {
   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
   Value *LHSBase, *RHSBase;
   APInt LHSOffset, RHSOffset;
-  llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
+  std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
   if (LHSBase) {
-    llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
+    std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
     if (RHSBase && LHSBase == RHSBase) {
       // We have common bases, fold the subtract to a constant based on the
       // offsets.
@@ -602,7 +604,7 @@ bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
   if (!isa<Constant>(RHS))
     if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
       RHS = SimpleRHS;
-  Value *SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, TD);
+  Value *SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL);
   if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) {
     SimplifiedValues[&I] = C;
     return true;
@@ -721,7 +723,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
     return false;
   }
   if (CS.isCall() &&
-      cast<CallInst>(CS.getInstruction())->hasFnAttr(Attribute::NoDuplicate))
+      cast<CallInst>(CS.getInstruction())->cannotDuplicate())
     ContainsNoDuplicateCall = true;
 
   if (Function *F = CS.getCalledFunction()) {
@@ -784,7 +786,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
   // during devirtualization and so we want to give it a hefty bonus for
   // inlining, but cap that bonus in the event that inlining wouldn't pan
   // out. Pretend to inline the function, with a custom threshold.
-  CallAnalyzer CA(TD, TTI, *F, InlineConstants::IndirectCallThreshold);
+  CallAnalyzer CA(DL, TTI, *F, InlineConstants::IndirectCallThreshold);
   if (CA.analyzeCall(CS)) {
     // We were able to inline the indirect call! Subtract the cost from the
     // bonus we want to apply, but don't go below zero.
@@ -872,6 +874,16 @@ bool CallAnalyzer::visitInstruction(Instruction &I) {
 /// viable, and true if inlining remains viable.
 bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+    // FIXME: Currently, the number of instructions in a function regardless of
+    // our ability to simplify them during inline to constants or dead code,
+    // are actually used by the vector bonus heuristic. As long as that's true,
+    // we have to special case debug intrinsics here to prevent differences in
+    // inlining due to debug symbols. Eventually, the number of unsimplified
+    // instructions shouldn't factor into the cost computation, but until then,
+    // hack around it here.
+    if (isa<DbgInfoIntrinsic>(I))
+      continue;
+
     ++NumInstructions;
     if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
       ++NumVectorInstructions;
@@ -921,10 +933,10 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
 /// returns 0 if V is not a pointer, and returns the constant '0' if there are
 /// no constant offsets applied.
 ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
-  if (!TD || !V->getType()->isPointerTy())
+  if (!DL || !V->getType()->isPointerTy())
     return 0;
 
-  unsigned IntPtrWidth = TD->getPointerSizeInBits();
+  unsigned IntPtrWidth = DL->getPointerSizeInBits();
   APInt Offset = APInt::getNullValue(IntPtrWidth);
 
   // Even though we don't look through PHI nodes, we could be called on an
@@ -948,7 +960,7 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
     assert(V->getType()->isPointerTy() && "Unexpected operand type!");
   } while (Visited.insert(V));
 
-  Type *IntPtrTy = TD->getIntPtrType(V->getContext());
+  Type *IntPtrTy = DL->getIntPtrType(V->getContext());
   return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
 }
 
@@ -983,12 +995,12 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
   // Give out bonuses per argument, as the instructions setting them up will
   // be gone after inlining.
   for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) {
-    if (TD && CS.isByValArgument(I)) {
+    if (DL && CS.isByValArgument(I)) {
       // We approximate the number of loads and stores needed by dividing the
       // size of the byval type by the target's pointer size.
       PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
-      unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType());
-      unsigned PointerSize = TD->getPointerSizeInBits();
+      unsigned TypeSize = DL->getTypeSizeInBits(PTy->getElementType());
+      unsigned PointerSize = DL->getPointerSizeInBits();
       // Ceiling division.
       unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
 
@@ -1040,9 +1052,8 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
 
   Function *Caller = CS.getInstruction()->getParent()->getParent();
   // Check if the caller function is recursive itself.
-  for (Value::use_iterator U = Caller->use_begin(), E = Caller->use_end();
-       U != E; ++U) {
-    CallSite Site(cast<Value>(*U));
+  for (User *U : Caller->users()) {
+    CallSite Site(U);
     if (!Site)
       continue;
     Instruction *I = Site.getInstruction();
@@ -1154,7 +1165,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
     }
   }
 
-  // If this is a noduplicate call, we can still inline as long as 
+  // If this is a noduplicate call, we can still inline as long as
   // inlining this would cause the removal of the caller (so the instruction
   // is not actually duplicated, just moved).
   if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
@@ -1168,7 +1179,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 /// \brief Dump stats about this call's analysis.
 void CallAnalyzer::dump() {
-#define DEBUG_PRINT_STAT(x) llvm::dbgs() << "      " #x ": " << x << "\n"
+#define DEBUG_PRINT_STAT(x) dbgs() << "      " #x ": " << x << "\n"
   DEBUG_PRINT_STAT(NumConstantArgs);
   DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
   DEBUG_PRINT_STAT(NumAllocaArgs);
@@ -1178,6 +1189,9 @@ void CallAnalyzer::dump() {
   DEBUG_PRINT_STAT(SROACostSavings);
   DEBUG_PRINT_STAT(SROACostSavingsLost);
   DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
+  DEBUG_PRINT_STAT(Cost);
+  DEBUG_PRINT_STAT(Threshold);
+  DEBUG_PRINT_STAT(VectorBonus);
 #undef DEBUG_PRINT_STAT
 }
 #endif
@@ -1190,7 +1204,7 @@ INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
 
 char InlineCostAnalysis::ID = 0;
 
-InlineCostAnalysis::InlineCostAnalysis() : CallGraphSCCPass(ID), TD(0) {}
+InlineCostAnalysis::InlineCostAnalysis() : CallGraphSCCPass(ID) {}
 
 InlineCostAnalysis::~InlineCostAnalysis() {}
 
@@ -1201,7 +1215,6 @@ void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
 }
 
 bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) {
-  TD = getAnalysisIfAvailable<DataLayout>();
   TTI = &getAnalysis<TargetTransformInfo>();
   return false;
 }
@@ -1259,7 +1272,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
   DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()
         << "...\n");
 
-  CallAnalyzer CA(TD, *TTI, *Callee, Threshold);
+  CallAnalyzer CA(Callee->getDataLayout(), *TTI, *Callee, Threshold);
   bool ShouldInline = CA.analyzeCall(CS);
 
   DEBUG(CA.dump());
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index b33e2cb..5317a47 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -15,14 +15,13 @@
 #define DEBUG_TYPE "iv-users"
 #include "llvm/Analysis/IVUsers.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Assembly/Writer.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Support/Debug.h"
@@ -34,7 +33,7 @@ char IVUsers::ID = 0;
 INITIALIZE_PASS_BEGIN(IVUsers, "iv-users",
                       "Induction Variable Users", false, true)
 INITIALIZE_PASS_DEPENDENCY(LoopInfo)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
 INITIALIZE_PASS_END(IVUsers, "iv-users",
                       "Induction Variable Users", false, true)
@@ -124,14 +123,14 @@ bool IVUsers::AddUsersImpl(Instruction *I,
   // IVUsers is used by LSR which assumes that all SCEV expressions are safe to
   // pass to SCEVExpander. Expressions are not safe to expand if they represent
   // operations that are not safe to speculate, namely integer division.
-  if (!isa<PHINode>(I) && !isSafeToSpeculativelyExecute(I, TD))
+  if (!isa<PHINode>(I) && !isSafeToSpeculativelyExecute(I, DL))
     return false;
 
   // LSR is not APInt clean, do not touch integers bigger than 64-bits.
   // Also avoid creating IVs of non-native types. For example, we don't want a
   // 64-bit IV in 32-bit code just because the loop has one 64-bit cast.
   uint64_t Width = SE->getTypeSizeInBits(I->getType());
-  if (Width > 64 || (TD && !TD->isLegalInteger(Width)))
+  if (Width > 64 || (DL && !DL->isLegalInteger(Width)))
     return false;
 
   // Get the symbolic expression for this instruction.
@@ -143,9 +142,8 @@ bool IVUsers::AddUsersImpl(Instruction *I,
     return false;
 
   SmallPtrSet<Instruction *, 4> UniqueUsers;
-  for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
-       UI != E; ++UI) {
-    Instruction *User = cast<Instruction>(*UI);
+  for (Use &U : I->uses()) {
+    Instruction *User = cast<Instruction>(U.getUser());
     if (!UniqueUsers.insert(User))
       continue;
 
@@ -158,7 +156,7 @@ bool IVUsers::AddUsersImpl(Instruction *I,
     BasicBlock *UseBB = User->getParent();
     // A phi's use is live out of its predecessor block.
     if (PHINode *PHI = dyn_cast<PHINode>(User)) {
-      unsigned OperandNo = UI.getOperandNo();
+      unsigned OperandNo = U.getOperandNo();
       unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo);
       UseBB = PHI->getIncomingBlock(ValNo);
     }
@@ -187,15 +185,34 @@ bool IVUsers::AddUsersImpl(Instruction *I,
 
     if (AddUserToIVUsers) {
       // Okay, we found a user that we cannot reduce.
-      IVUses.push_back(new IVStrideUse(this, User, I));
-      IVStrideUse &NewUse = IVUses.back();
+      IVStrideUse &NewUse = AddUser(User, I);
       // Autodetect the post-inc loop set, populating NewUse.PostIncLoops.
       // The regular return value here is discarded; instead of recording
       // it, we just recompute it when we need it.
+      const SCEV *OriginalISE = ISE;
       ISE = TransformForPostIncUse(NormalizeAutodetect,
                                    ISE, User, I,
                                    NewUse.PostIncLoops,
                                    *SE, *DT);
+
+      // PostIncNormalization effectively simplifies the expression under
+      // pre-increment assumptions. Those assumptions (no wrapping) might not
+      // hold for the post-inc value. Catch such cases by making sure the
+      // transformation is invertible.
+      if (OriginalISE != ISE) {
+        const SCEV *DenormalizedISE =
+          TransformForPostIncUse(Denormalize, ISE, User, I,
+              NewUse.PostIncLoops, *SE, *DT);
+
+        // If we normalized the expression, but denormalization doesn't give the
+        // original one, discard this user.
+        if (OriginalISE != DenormalizedISE) {
+          DEBUG(dbgs() << "   DISCARDING (NORMALIZATION ISN'T INVERTIBLE): "
+                       << *ISE << '\n');
+          IVUses.pop_back();
+          return false;
+        }
+      }
       DEBUG(if (SE->getSCEV(I) != ISE)
               dbgs() << "   NORMALIZED TO: " << *ISE << '\n');
     }
@@ -224,7 +241,7 @@ IVUsers::IVUsers()
 
 void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<LoopInfo>();
-  AU.addRequired<DominatorTree>();
+  AU.addRequired<DominatorTreeWrapperPass>();
   AU.addRequired<ScalarEvolution>();
   AU.setPreservesAll();
 }
@@ -233,9 +250,10 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
 
   L = l;
   LI = &getAnalysis<LoopInfo>();
-  DT = &getAnalysis<DominatorTree>();
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   SE = &getAnalysis<ScalarEvolution>();
-  TD = getAnalysisIfAvailable<DataLayout>();
+  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+  DL = DLP ? &DLP->getDataLayout() : 0;
 
   // Find all uses of induction variables in this loop, and categorize
   // them by stride.  Start by finding all of the PHI nodes in the header for
@@ -248,7 +266,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
 
 void IVUsers::print(raw_ostream &OS, const Module *M) const {
   OS << "IV Users for loop ";
-  WriteAsOperand(OS, L->getHeader(), false);
+  L->getHeader()->printAsOperand(OS, false);
   if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
     OS << " with backedge-taken count "
        << *SE->getBackedgeTakenCount(L);
@@ -258,13 +276,13 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const {
   for (ilist<IVStrideUse>::const_iterator UI = IVUses.begin(),
        E = IVUses.end(); UI != E; ++UI) {
     OS << "  ";
-    WriteAsOperand(OS, UI->getOperandValToReplace(), false);
+    UI->getOperandValToReplace()->printAsOperand(OS, false);
     OS << " = " << *getReplacementExpr(*UI);
     for (PostIncLoopSet::const_iterator
          I = UI->PostIncLoops.begin(),
          E = UI->PostIncLoops.end(); I != E; ++I) {
       OS << " (post-inc with loop ";
-      WriteAsOperand(OS, (*I)->getHeader(), false);
+      (*I)->getHeader()->printAsOperand(OS, false);
       OS << ")";
     }
     OS << " in  ";
diff --git a/lib/Analysis/InstCount.cpp b/lib/Analysis/InstCount.cpp
index 75a49eb..3d05556 100644
--- a/lib/Analysis/InstCount.cpp
+++ b/lib/Analysis/InstCount.cpp
@@ -15,7 +15,7 @@
 #include "llvm/Analysis/Passes.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/IR/Function.h"
-#include "llvm/InstVisitor.h"
+#include "llvm/IR/InstVisitor.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -55,12 +55,12 @@ namespace {
       initializeInstCountPass(*PassRegistry::getPassRegistry());
     }
 
-    virtual bool runOnFunction(Function &F);
+    bool runOnFunction(Function &F) override;
 
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesAll();
     }
-    virtual void print(raw_ostream &O, const Module *M) const {}
+    void print(raw_ostream &O, const Module *M) const override {}
 
   };
 }
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index b867af1..d8d8a09 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -22,16 +22,16 @@
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/GlobalAlias.h"
 #include "llvm/IR/Operator.h"
-#include "llvm/Support/ConstantRange.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Support/PatternMatch.h"
-#include "llvm/Support/ValueHandle.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ValueHandle.h"
 using namespace llvm;
 using namespace llvm::PatternMatch;
 
@@ -42,12 +42,12 @@ STATISTIC(NumFactor , "Number of factorizations");
 STATISTIC(NumReassoc, "Number of reassociations");
 
 struct Query {
-  const DataLayout *TD;
+  const DataLayout *DL;
   const TargetLibraryInfo *TLI;
   const DominatorTree *DT;
 
-  Query(const DataLayout *td, const TargetLibraryInfo *tli,
-        const DominatorTree *dt) : TD(td), TLI(tli), DT(dt) {}
+  Query(const DataLayout *DL, const TargetLibraryInfo *tli,
+        const DominatorTree *dt) : DL(DL), TLI(tli), DT(dt) {}
 };
 
 static Value *SimplifyAndInst(Value *, Value *, const Query &, unsigned);
@@ -595,7 +595,7 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
     if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { CLHS, CRHS };
       return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(), Ops,
-                                      Q.TD, Q.TLI);
+                                      Q.DL, Q.TLI);
     }
 
     // Canonicalize the constant to the RHS.
@@ -651,9 +651,9 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
 }
 
 Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                             const DataLayout *TD, const TargetLibraryInfo *TLI,
+                             const DataLayout *DL, const TargetLibraryInfo *TLI,
                              const DominatorTree *DT) {
-  return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT),
+  return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query (DL, TLI, DT),
                            RecursionLimit);
 }
 
@@ -667,17 +667,17 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
 /// This is very similar to GetPointerBaseWithConstantOffset except it doesn't
 /// follow non-inbounds geps. This allows it to remain usable for icmp ult/etc.
 /// folding.
-static Constant *stripAndComputeConstantOffsets(const DataLayout *TD,
+static Constant *stripAndComputeConstantOffsets(const DataLayout *DL,
                                                 Value *&V,
                                                 bool AllowNonInbounds = false) {
   assert(V->getType()->getScalarType()->isPointerTy());
 
   // Without DataLayout, just be conservative for now. Theoretically, more could
   // be done in this case.
-  if (!TD)
+  if (!DL)
     return ConstantInt::get(IntegerType::get(V->getContext(), 64), 0);
 
-  Type *IntPtrTy = TD->getIntPtrType(V->getType())->getScalarType();
+  Type *IntPtrTy = DL->getIntPtrType(V->getType())->getScalarType();
   APInt Offset = APInt::getNullValue(IntPtrTy->getIntegerBitWidth());
 
   // Even though we don't look through PHI nodes, we could be called on an
@@ -687,7 +687,7 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout *TD,
   do {
     if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
       if ((!AllowNonInbounds && !GEP->isInBounds()) ||
-          !GEP->accumulateConstantOffset(*TD, Offset))
+          !GEP->accumulateConstantOffset(*DL, Offset))
         break;
       V = GEP->getPointerOperand();
     } else if (Operator::getOpcode(V) == Instruction::BitCast) {
@@ -712,10 +712,10 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout *TD,
 
 /// \brief Compute the constant difference between two pointer values.
 /// If the difference is not a constant, returns zero.
-static Constant *computePointerDifference(const DataLayout *TD,
+static Constant *computePointerDifference(const DataLayout *DL,
                                           Value *LHS, Value *RHS) {
-  Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS);
-  Constant *RHSOffset = stripAndComputeConstantOffsets(TD, RHS);
+  Constant *LHSOffset = stripAndComputeConstantOffsets(DL, LHS);
+  Constant *RHSOffset = stripAndComputeConstantOffsets(DL, RHS);
 
   // If LHS and RHS are not related via constant offsets to the same base
   // value, there is nothing we can do here.
@@ -737,7 +737,7 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
     if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { CLHS, CRHS };
       return ConstantFoldInstOperands(Instruction::Sub, CLHS->getType(),
-                                      Ops, Q.TD, Q.TLI);
+                                      Ops, Q.DL, Q.TLI);
     }
 
   // X - undef -> undef
@@ -831,7 +831,7 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
   // Variations on GEP(base, I, ...) - GEP(base, i, ...) -> GEP(null, I-i, ...).
   if (match(Op0, m_PtrToInt(m_Value(X))) &&
       match(Op1, m_PtrToInt(m_Value(Y))))
-    if (Constant *Result = computePointerDifference(Q.TD, X, Y))
+    if (Constant *Result = computePointerDifference(Q.DL, X, Y))
       return ConstantExpr::getIntegerCast(Result, Op0->getType(), true);
 
   // Mul distributes over Sub.  Try some generic simplifications based on this.
@@ -857,9 +857,9 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
 }
 
 Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                             const DataLayout *TD, const TargetLibraryInfo *TLI,
+                             const DataLayout *DL, const TargetLibraryInfo *TLI,
                              const DominatorTree *DT) {
-  return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT),
+  return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Query (DL, TLI, DT),
                            RecursionLimit);
 }
 
@@ -871,7 +871,7 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
     if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { CLHS, CRHS };
       return ConstantFoldInstOperands(Instruction::FAdd, CLHS->getType(),
-                                      Ops, Q.TD, Q.TLI);
+                                      Ops, Q.DL, Q.TLI);
     }
 
     // Canonicalize the constant to the RHS.
@@ -913,7 +913,7 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
     if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { CLHS, CRHS };
       return ConstantFoldInstOperands(Instruction::FSub, CLHS->getType(),
-                                      Ops, Q.TD, Q.TLI);
+                                      Ops, Q.DL, Q.TLI);
     }
   }
 
@@ -951,7 +951,7 @@ static Value *SimplifyFMulInst(Value *Op0, Value *Op1,
     if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { CLHS, CRHS };
       return ConstantFoldInstOperands(Instruction::FMul, CLHS->getType(),
-                                      Ops, Q.TD, Q.TLI);
+                                      Ops, Q.DL, Q.TLI);
     }
 
     // Canonicalize the constant to the RHS.
@@ -977,7 +977,7 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q,
     if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { CLHS, CRHS };
       return ConstantFoldInstOperands(Instruction::Mul, CLHS->getType(),
-                                      Ops, Q.TD, Q.TLI);
+                                      Ops, Q.DL, Q.TLI);
     }
 
     // Canonicalize the constant to the RHS.
@@ -1035,29 +1035,29 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q,
 }
 
 Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
-                             const DataLayout *TD, const TargetLibraryInfo *TLI,
+                             const DataLayout *DL, const TargetLibraryInfo *TLI,
                              const DominatorTree *DT) {
-  return ::SimplifyFAddInst(Op0, Op1, FMF, Query (TD, TLI, DT), RecursionLimit);
+  return ::SimplifyFAddInst(Op0, Op1, FMF, Query (DL, TLI, DT), RecursionLimit);
 }
 
 Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
-                             const DataLayout *TD, const TargetLibraryInfo *TLI,
+                             const DataLayout *DL, const TargetLibraryInfo *TLI,
                              const DominatorTree *DT) {
-  return ::SimplifyFSubInst(Op0, Op1, FMF, Query (TD, TLI, DT), RecursionLimit);
+  return ::SimplifyFSubInst(Op0, Op1, FMF, Query (DL, TLI, DT), RecursionLimit);
 }
 
 Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1,
                               FastMathFlags FMF,
-                              const DataLayout *TD,
+                              const DataLayout *DL,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT) {
-  return ::SimplifyFMulInst(Op0, Op1, FMF, Query (TD, TLI, DT), RecursionLimit);
+  return ::SimplifyFMulInst(Op0, Op1, FMF, Query (DL, TLI, DT), RecursionLimit);
 }
 
-Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout *TD,
+Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout *DL,
                              const TargetLibraryInfo *TLI,
                              const DominatorTree *DT) {
-  return ::SimplifyMulInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
+  return ::SimplifyMulInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit);
 }
 
 /// SimplifyDiv - Given operands for an SDiv or UDiv, see if we can
@@ -1067,7 +1067,7 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
   if (Constant *C0 = dyn_cast<Constant>(Op0)) {
     if (Constant *C1 = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { C0, C1 };
-      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.TD, Q.TLI);
+      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.DL, Q.TLI);
     }
   }
 
@@ -1142,10 +1142,10 @@ static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q,
   return 0;
 }
 
-Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout *TD,
+Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout *DL,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT) {
-  return ::SimplifySDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
+  return ::SimplifySDivInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit);
 }
 
 /// SimplifyUDivInst - Given operands for a UDiv, see if we can
@@ -1158,10 +1158,10 @@ static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q,
   return 0;
 }
 
-Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout *TD,
+Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout *DL,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT) {
-  return ::SimplifyUDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
+  return ::SimplifyUDivInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit);
 }
 
 static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const Query &Q,
@@ -1177,10 +1177,10 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const Query &Q,
   return 0;
 }
 
-Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const DataLayout *TD,
+Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const DataLayout *DL,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT) {
-  return ::SimplifyFDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
+  return ::SimplifyFDivInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit);
 }
 
 /// SimplifyRem - Given operands for an SRem or URem, see if we can
@@ -1190,7 +1190,7 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
   if (Constant *C0 = dyn_cast<Constant>(Op0)) {
     if (Constant *C1 = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { C0, C1 };
-      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.TD, Q.TLI);
+      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.DL, Q.TLI);
     }
   }
 
@@ -1247,10 +1247,10 @@ static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q,
   return 0;
 }
 
-Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout *TD,
+Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout *DL,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT) {
-  return ::SimplifySRemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
+  return ::SimplifySRemInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit);
 }
 
 /// SimplifyURemInst - Given operands for a URem, see if we can
@@ -1263,10 +1263,10 @@ static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q,
   return 0;
 }
 
-Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout *TD,
+Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout *DL,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT) {
-  return ::SimplifyURemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
+  return ::SimplifyURemInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit);
 }
 
 static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const Query &,
@@ -1282,10 +1282,37 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const Query &,
   return 0;
 }
 
-Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const DataLayout *TD,
+Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const DataLayout *DL,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT) {
-  return ::SimplifyFRemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
+  return ::SimplifyFRemInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit);
+}
+
+/// isUndefShift - Returns true if a shift by \c Amount always yields undef.
+static bool isUndefShift(Value *Amount) {
+  Constant *C = dyn_cast<Constant>(Amount);
+  if (!C)
+    return false;
+
+  // X shift by undef -> undef because it may shift by the bitwidth.
+  if (isa<UndefValue>(C))
+    return true;
+
+  // Shifting by the bitwidth or more is undefined.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
+    if (CI->getValue().getLimitedValue() >=
+        CI->getType()->getScalarSizeInBits())
+      return true;
+
+  // If all lanes of a vector shift are undefined the whole shift is.
+  if (isa<ConstantVector>(C) || isa<ConstantDataVector>(C)) {
+    for (unsigned I = 0, E = C->getType()->getVectorNumElements(); I != E; ++I)
+      if (!isUndefShift(C->getAggregateElement(I)))
+        return false;
+    return true;
+  }
+
+  return false;
 }
 
 /// SimplifyShift - Given operands for an Shl, LShr or AShr, see if we can
@@ -1295,7 +1322,7 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
   if (Constant *C0 = dyn_cast<Constant>(Op0)) {
     if (Constant *C1 = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { C0, C1 };
-      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.TD, Q.TLI);
+      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.DL, Q.TLI);
     }
   }
 
@@ -1307,15 +1334,9 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
   if (match(Op1, m_Zero()))
     return Op0;
 
-  // X shift by undef -> undef because it may shift by the bitwidth.
-  if (match(Op1, m_Undef()))
-    return Op1;
-
-  // Shifting by the bitwidth or more is undefined.
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1))
-    if (CI->getValue().getLimitedValue() >=
-        Op0->getType()->getScalarSizeInBits())
-      return UndefValue::get(Op0->getType());
+  // Fold undefined shifts.
+  if (isUndefShift(Op1))
+    return UndefValue::get(Op0->getType());
 
   // If the operation is with the result of a select instruction, check whether
   // operating on either branch of the select always yields the same value.
@@ -1351,9 +1372,9 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
 }
 
 Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                             const DataLayout *TD, const TargetLibraryInfo *TLI,
+                             const DataLayout *DL, const TargetLibraryInfo *TLI,
                              const DominatorTree *DT) {
-  return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT),
+  return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query (DL, TLI, DT),
                            RecursionLimit);
 }
 
@@ -1382,10 +1403,10 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
 }
 
 Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
-                              const DataLayout *TD,
+                              const DataLayout *DL,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT) {
-  return ::SimplifyLShrInst(Op0, Op1, isExact, Query (TD, TLI, DT),
+  return ::SimplifyLShrInst(Op0, Op1, isExact, Query (DL, TLI, DT),
                             RecursionLimit);
 }
 
@@ -1418,10 +1439,10 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
 }
 
 Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
-                              const DataLayout *TD,
+                              const DataLayout *DL,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT) {
-  return ::SimplifyAShrInst(Op0, Op1, isExact, Query (TD, TLI, DT),
+  return ::SimplifyAShrInst(Op0, Op1, isExact, Query (DL, TLI, DT),
                             RecursionLimit);
 }
 
@@ -1433,7 +1454,7 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q,
     if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { CLHS, CRHS };
       return ConstantFoldInstOperands(Instruction::And, CLHS->getType(),
-                                      Ops, Q.TD, Q.TLI);
+                                      Ops, Q.DL, Q.TLI);
     }
 
     // Canonicalize the constant to the RHS.
@@ -1518,10 +1539,10 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q,
   return 0;
 }
 
-Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout *TD,
+Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout *DL,
                              const TargetLibraryInfo *TLI,
                              const DominatorTree *DT) {
-  return ::SimplifyAndInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
+  return ::SimplifyAndInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit);
 }
 
 /// SimplifyOrInst - Given operands for an Or, see if we can
@@ -1532,7 +1553,7 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q,
     if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { CLHS, CRHS };
       return ConstantFoldInstOperands(Instruction::Or, CLHS->getType(),
-                                      Ops, Q.TD, Q.TLI);
+                                      Ops, Q.DL, Q.TLI);
     }
 
     // Canonicalize the constant to the RHS.
@@ -1612,10 +1633,10 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q,
   return 0;
 }
 
-Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout *TD,
+Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout *DL,
                             const TargetLibraryInfo *TLI,
                             const DominatorTree *DT) {
-  return ::SimplifyOrInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
+  return ::SimplifyOrInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit);
 }
 
 /// SimplifyXorInst - Given operands for a Xor, see if we can
@@ -1626,7 +1647,7 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q,
     if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { CLHS, CRHS };
       return ConstantFoldInstOperands(Instruction::Xor, CLHS->getType(),
-                                      Ops, Q.TD, Q.TLI);
+                                      Ops, Q.DL, Q.TLI);
     }
 
     // Canonicalize the constant to the RHS.
@@ -1672,10 +1693,10 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q,
   return 0;
 }
 
-Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout *TD,
+Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout *DL,
                              const TargetLibraryInfo *TLI,
                              const DominatorTree *DT) {
-  return ::SimplifyXorInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
+  return ::SimplifyXorInst(Op0, Op1, Query (DL, TLI, DT), RecursionLimit);
 }
 
 static Type *GetCompareTy(Value *Op) {
@@ -1730,7 +1751,7 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
 // If the C and C++ standards are ever made sufficiently restrictive in this
 // area, it may be possible to update LLVM's semantics accordingly and reinstate
 // this optimization.
-static Constant *computePointerICmp(const DataLayout *TD,
+static Constant *computePointerICmp(const DataLayout *DL,
                                     const TargetLibraryInfo *TLI,
                                     CmpInst::Predicate Pred,
                                     Value *LHS, Value *RHS) {
@@ -1772,8 +1793,8 @@ static Constant *computePointerICmp(const DataLayout *TD,
   // numerous hazards. AliasAnalysis and its utilities rely on special rules
   // governing loads and stores which don't apply to icmps. Also, AliasAnalysis
   // doesn't need to guarantee pointer inequality when it says NoAlias.
-  Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS);
-  Constant *RHSOffset = stripAndComputeConstantOffsets(TD, RHS);
+  Constant *LHSOffset = stripAndComputeConstantOffsets(DL, LHS);
+  Constant *RHSOffset = stripAndComputeConstantOffsets(DL, RHS);
 
   // If LHS and RHS are related via constant offsets to the same base
   // value, we can replace it with an icmp which just compares the offsets.
@@ -1817,8 +1838,8 @@ static Constant *computePointerICmp(const DataLayout *TD,
       ConstantInt *RHSOffsetCI = dyn_cast<ConstantInt>(RHSOffset);
       uint64_t LHSSize, RHSSize;
       if (LHSOffsetCI && RHSOffsetCI &&
-          getObjectSize(LHS, LHSSize, TD, TLI) &&
-          getObjectSize(RHS, RHSSize, TD, TLI)) {
+          getObjectSize(LHS, LHSSize, DL, TLI) &&
+          getObjectSize(RHS, RHSSize, DL, TLI)) {
         const APInt &LHSOffsetValue = LHSOffsetCI->getValue();
         const APInt &RHSOffsetValue = RHSOffsetCI->getValue();
         if (!LHSOffsetValue.isNegative() &&
@@ -1844,8 +1865,8 @@ static Constant *computePointerICmp(const DataLayout *TD,
     // equality comparisons concerning the result. We avoid walking the whole
     // chain again by starting where the last calls to
     // stripAndComputeConstantOffsets left off and accumulate the offsets.
-    Constant *LHSNoBound = stripAndComputeConstantOffsets(TD, LHS, true);
-    Constant *RHSNoBound = stripAndComputeConstantOffsets(TD, RHS, true);
+    Constant *LHSNoBound = stripAndComputeConstantOffsets(DL, LHS, true);
+    Constant *RHSNoBound = stripAndComputeConstantOffsets(DL, RHS, true);
     if (LHS == RHS)
       return ConstantExpr::getICmp(Pred,
                                    ConstantExpr::getAdd(LHSOffset, LHSNoBound),
@@ -1865,7 +1886,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
 
   if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
     if (Constant *CRHS = dyn_cast<Constant>(RHS))
-      return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.TD, Q.TLI);
+      return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.DL, Q.TLI);
 
     // If we have a constant, make sure it is on the RHS.
     std::swap(LHS, RHS);
@@ -1929,40 +1950,40 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       return getTrue(ITy);
     case ICmpInst::ICMP_EQ:
     case ICmpInst::ICMP_ULE:
-      if (isKnownNonZero(LHS, Q.TD))
+      if (isKnownNonZero(LHS, Q.DL))
         return getFalse(ITy);
       break;
     case ICmpInst::ICMP_NE:
     case ICmpInst::ICMP_UGT:
-      if (isKnownNonZero(LHS, Q.TD))
+      if (isKnownNonZero(LHS, Q.DL))
         return getTrue(ITy);
       break;
     case ICmpInst::ICMP_SLT:
-      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD);
+      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL);
       if (LHSKnownNegative)
         return getTrue(ITy);
       if (LHSKnownNonNegative)
         return getFalse(ITy);
       break;
     case ICmpInst::ICMP_SLE:
-      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD);
+      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL);
       if (LHSKnownNegative)
         return getTrue(ITy);
-      if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.TD))
+      if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL))
         return getFalse(ITy);
       break;
     case ICmpInst::ICMP_SGE:
-      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD);
+      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL);
       if (LHSKnownNegative)
         return getFalse(ITy);
       if (LHSKnownNonNegative)
         return getTrue(ITy);
       break;
     case ICmpInst::ICMP_SGT:
-      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD);
+      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL);
       if (LHSKnownNegative)
         return getFalse(ITy);
-      if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.TD))
+      if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL))
         return getTrue(ITy);
       break;
     }
@@ -2045,8 +2066,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
 
     // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input
     // if the integer type is the same size as the pointer type.
-    if (MaxRecurse && Q.TD && isa<PtrToIntInst>(LI) &&
-        Q.TD->getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()) {
+    if (MaxRecurse && Q.DL && isa<PtrToIntInst>(LI) &&
+        Q.DL->getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()) {
       if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
         // Transfer the cast to the constant.
         if (Value *V = SimplifyICmpInst(Pred, SrcOp,
@@ -2266,7 +2287,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       break;
     case ICmpInst::ICMP_SGT:
     case ICmpInst::ICMP_SGE:
-      ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.TD);
+      ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL);
       if (!KnownNonNegative)
         break;
       // fall-through
@@ -2276,7 +2297,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       return getFalse(ITy);
     case ICmpInst::ICMP_SLT:
     case ICmpInst::ICMP_SLE:
-      ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.TD);
+      ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.DL);
       if (!KnownNonNegative)
         break;
       // fall-through
@@ -2295,7 +2316,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       break;
     case ICmpInst::ICMP_SGT:
     case ICmpInst::ICMP_SGE:
-      ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.TD);
+      ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL);
       if (!KnownNonNegative)
         break;
       // fall-through
@@ -2305,7 +2326,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       return getTrue(ITy);
     case ICmpInst::ICMP_SLT:
     case ICmpInst::ICMP_SLE:
-      ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.TD);
+      ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.DL);
       if (!KnownNonNegative)
         break;
       // fall-through
@@ -2548,7 +2569,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   // Simplify comparisons of related pointers using a powerful, recursive
   // GEP-walk when we have target data available..
   if (LHS->getType()->isPointerTy())
-    if (Constant *C = computePointerICmp(Q.TD, Q.TLI, Pred, LHS, RHS))
+    if (Constant *C = computePointerICmp(Q.DL, Q.TLI, Pred, LHS, RHS))
       return C;
 
   if (GetElementPtrInst *GLHS = dyn_cast<GetElementPtrInst>(LHS)) {
@@ -2588,10 +2609,10 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
 }
 
 Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                              const DataLayout *TD,
+                              const DataLayout *DL,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT) {
-  return ::SimplifyICmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT),
+  return ::SimplifyICmpInst(Predicate, LHS, RHS, Query (DL, TLI, DT),
                             RecursionLimit);
 }
 
@@ -2604,7 +2625,7 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
 
   if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
     if (Constant *CRHS = dyn_cast<Constant>(RHS))
-      return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.TD, Q.TLI);
+      return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.DL, Q.TLI);
 
     // If we have a constant, make sure it is on the RHS.
     std::swap(LHS, RHS);
@@ -2685,10 +2706,10 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
 }
 
 Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                              const DataLayout *TD,
+                              const DataLayout *DL,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT) {
-  return ::SimplifyFCmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT),
+  return ::SimplifyFCmpInst(Predicate, LHS, RHS, Query (DL, TLI, DT),
                             RecursionLimit);
 }
 
@@ -2699,8 +2720,12 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
                                  unsigned MaxRecurse) {
   // select true, X, Y  -> X
   // select false, X, Y -> Y
-  if (ConstantInt *CB = dyn_cast<ConstantInt>(CondVal))
-    return CB->getZExtValue() ? TrueVal : FalseVal;
+  if (Constant *CB = dyn_cast<Constant>(CondVal)) {
+    if (CB->isAllOnesValue())
+      return TrueVal;
+    if (CB->isNullValue())
+      return FalseVal;
+  }
 
   // select C, X, X -> X
   if (TrueVal == FalseVal)
@@ -2720,10 +2745,10 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
 }
 
 Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
-                                const DataLayout *TD,
+                                const DataLayout *DL,
                                 const TargetLibraryInfo *TLI,
                                 const DominatorTree *DT) {
-  return ::SimplifySelectInst(Cond, TrueVal, FalseVal, Query (TD, TLI, DT),
+  return ::SimplifySelectInst(Cond, TrueVal, FalseVal, Query (DL, TLI, DT),
                               RecursionLimit);
 }
 
@@ -2731,10 +2756,7 @@ Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
 /// fold the result.  If not, this returns null.
 static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) {
   // The type of the GEP pointer operand.
-  PointerType *PtrTy = dyn_cast<PointerType>(Ops[0]->getType());
-  // The GEP pointer operand is not a pointer, it's a vector of pointers.
-  if (!PtrTy)
-    return 0;
+  PointerType *PtrTy = cast<PointerType>(Ops[0]->getType()->getScalarType());
 
   // getelementptr P -> P.
   if (Ops.size() == 1)
@@ -2744,18 +2766,19 @@ static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) {
     // Compute the (pointer) type returned by the GEP instruction.
     Type *LastType = GetElementPtrInst::getIndexedType(PtrTy, Ops.slice(1));
     Type *GEPTy = PointerType::get(LastType, PtrTy->getAddressSpace());
+    if (VectorType *VT = dyn_cast<VectorType>(Ops[0]->getType()))
+      GEPTy = VectorType::get(GEPTy, VT->getNumElements());
     return UndefValue::get(GEPTy);
   }
 
   if (Ops.size() == 2) {
     // getelementptr P, 0 -> P.
-    if (ConstantInt *C = dyn_cast<ConstantInt>(Ops[1]))
-      if (C->isZero())
-        return Ops[0];
+    if (match(Ops[1], m_Zero()))
+      return Ops[0];
     // getelementptr P, N -> P if P points to a type of zero size.
-    if (Q.TD) {
+    if (Q.DL) {
       Type *Ty = PtrTy->getElementType();
-      if (Ty->isSized() && Q.TD->getTypeAllocSize(Ty) == 0)
+      if (Ty->isSized() && Q.DL->getTypeAllocSize(Ty) == 0)
         return Ops[0];
     }
   }
@@ -2768,10 +2791,10 @@ static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) {
   return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]), Ops.slice(1));
 }
 
-Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const DataLayout *TD,
+Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const DataLayout *DL,
                              const TargetLibraryInfo *TLI,
                              const DominatorTree *DT) {
-  return ::SimplifyGEPInst(Ops, Query (TD, TLI, DT), RecursionLimit);
+  return ::SimplifyGEPInst(Ops, Query (DL, TLI, DT), RecursionLimit);
 }
 
 /// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we
@@ -2805,10 +2828,10 @@ static Value *SimplifyInsertValueInst(Value *Agg, Value *Val,
 
 Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val,
                                      ArrayRef<unsigned> Idxs,
-                                     const DataLayout *TD,
+                                     const DataLayout *DL,
                                      const TargetLibraryInfo *TLI,
                                      const DominatorTree *DT) {
-  return ::SimplifyInsertValueInst(Agg, Val, Idxs, Query (TD, TLI, DT),
+  return ::SimplifyInsertValueInst(Agg, Val, Idxs, Query (DL, TLI, DT),
                                    RecursionLimit);
 }
 
@@ -2848,15 +2871,15 @@ static Value *SimplifyPHINode(PHINode *PN, const Query &Q) {
 
 static Value *SimplifyTruncInst(Value *Op, Type *Ty, const Query &Q, unsigned) {
   if (Constant *C = dyn_cast<Constant>(Op))
-    return ConstantFoldInstOperands(Instruction::Trunc, Ty, C, Q.TD, Q.TLI);
+    return ConstantFoldInstOperands(Instruction::Trunc, Ty, C, Q.DL, Q.TLI);
 
   return 0;
 }
 
-Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout *TD,
+Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout *DL,
                                const TargetLibraryInfo *TLI,
                                const DominatorTree *DT) {
-  return ::SimplifyTruncInst(Op, Ty, Query (TD, TLI, DT), RecursionLimit);
+  return ::SimplifyTruncInst(Op, Ty, Query (DL, TLI, DT), RecursionLimit);
 }
 
 //=== Helper functions for higher up the class hierarchy.
@@ -2901,7 +2924,7 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
     if (Constant *CLHS = dyn_cast<Constant>(LHS))
       if (Constant *CRHS = dyn_cast<Constant>(RHS)) {
         Constant *COps[] = {CLHS, CRHS};
-        return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, Q.TD,
+        return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, Q.DL,
                                         Q.TLI);
       }
 
@@ -2927,9 +2950,9 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
 }
 
 Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
-                           const DataLayout *TD, const TargetLibraryInfo *TLI,
+                           const DataLayout *DL, const TargetLibraryInfo *TLI,
                            const DominatorTree *DT) {
-  return ::SimplifyBinOp(Opcode, LHS, RHS, Query (TD, TLI, DT), RecursionLimit);
+  return ::SimplifyBinOp(Opcode, LHS, RHS, Query (DL, TLI, DT), RecursionLimit);
 }
 
 /// SimplifyCmpInst - Given operands for a CmpInst, see if we can
@@ -2942,9 +2965,9 @@ static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
 }
 
 Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                             const DataLayout *TD, const TargetLibraryInfo *TLI,
+                             const DataLayout *DL, const TargetLibraryInfo *TLI,
                              const DominatorTree *DT) {
-  return ::SimplifyCmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT),
+  return ::SimplifyCmpInst(Predicate, LHS, RHS, Query (DL, TLI, DT),
                            RecursionLimit);
 }
 
@@ -3017,136 +3040,136 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd,
 }
 
 Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin,
-                          User::op_iterator ArgEnd, const DataLayout *TD,
+                          User::op_iterator ArgEnd, const DataLayout *DL,
                           const TargetLibraryInfo *TLI,
                           const DominatorTree *DT) {
-  return ::SimplifyCall(V, ArgBegin, ArgEnd, Query(TD, TLI, DT),
+  return ::SimplifyCall(V, ArgBegin, ArgEnd, Query(DL, TLI, DT),
                         RecursionLimit);
 }
 
 Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args,
-                          const DataLayout *TD, const TargetLibraryInfo *TLI,
+                          const DataLayout *DL, const TargetLibraryInfo *TLI,
                           const DominatorTree *DT) {
-  return ::SimplifyCall(V, Args.begin(), Args.end(), Query(TD, TLI, DT),
+  return ::SimplifyCall(V, Args.begin(), Args.end(), Query(DL, TLI, DT),
                         RecursionLimit);
 }
 
 /// SimplifyInstruction - See if we can compute a simplified version of this
 /// instruction.  If not, this returns null.
-Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *TD,
+Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *DL,
                                  const TargetLibraryInfo *TLI,
                                  const DominatorTree *DT) {
   Value *Result;
 
   switch (I->getOpcode()) {
   default:
-    Result = ConstantFoldInstruction(I, TD, TLI);
+    Result = ConstantFoldInstruction(I, DL, TLI);
     break;
   case Instruction::FAdd:
     Result = SimplifyFAddInst(I->getOperand(0), I->getOperand(1),
-                              I->getFastMathFlags(), TD, TLI, DT);
+                              I->getFastMathFlags(), DL, TLI, DT);
     break;
   case Instruction::Add:
     Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1),
                              cast<BinaryOperator>(I)->hasNoSignedWrap(),
                              cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
-                             TD, TLI, DT);
+                             DL, TLI, DT);
     break;
   case Instruction::FSub:
     Result = SimplifyFSubInst(I->getOperand(0), I->getOperand(1),
-                              I->getFastMathFlags(), TD, TLI, DT);
+                              I->getFastMathFlags(), DL, TLI, DT);
     break;
   case Instruction::Sub:
     Result = SimplifySubInst(I->getOperand(0), I->getOperand(1),
                              cast<BinaryOperator>(I)->hasNoSignedWrap(),
                              cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
-                             TD, TLI, DT);
+                             DL, TLI, DT);
     break;
   case Instruction::FMul:
     Result = SimplifyFMulInst(I->getOperand(0), I->getOperand(1),
-                              I->getFastMathFlags(), TD, TLI, DT);
+                              I->getFastMathFlags(), DL, TLI, DT);
     break;
   case Instruction::Mul:
-    Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
+    Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT);
     break;
   case Instruction::SDiv:
-    Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
+    Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT);
     break;
   case Instruction::UDiv:
-    Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
+    Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT);
     break;
   case Instruction::FDiv:
-    Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
+    Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT);
     break;
   case Instruction::SRem:
-    Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
+    Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT);
     break;
   case Instruction::URem:
-    Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
+    Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT);
     break;
   case Instruction::FRem:
-    Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
+    Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT);
     break;
   case Instruction::Shl:
     Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1),
                              cast<BinaryOperator>(I)->hasNoSignedWrap(),
                              cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
-                             TD, TLI, DT);
+                             DL, TLI, DT);
     break;
   case Instruction::LShr:
     Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1),
                               cast<BinaryOperator>(I)->isExact(),
-                              TD, TLI, DT);
+                              DL, TLI, DT);
     break;
   case Instruction::AShr:
     Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1),
                               cast<BinaryOperator>(I)->isExact(),
-                              TD, TLI, DT);
+                              DL, TLI, DT);
     break;
   case Instruction::And:
-    Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
+    Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT);
     break;
   case Instruction::Or:
-    Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
+    Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT);
     break;
   case Instruction::Xor:
-    Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
+    Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), DL, TLI, DT);
     break;
   case Instruction::ICmp:
     Result = SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(),
-                              I->getOperand(0), I->getOperand(1), TD, TLI, DT);
+                              I->getOperand(0), I->getOperand(1), DL, TLI, DT);
     break;
   case Instruction::FCmp:
     Result = SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(),
-                              I->getOperand(0), I->getOperand(1), TD, TLI, DT);
+                              I->getOperand(0), I->getOperand(1), DL, TLI, DT);
     break;
   case Instruction::Select:
     Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1),
-                                I->getOperand(2), TD, TLI, DT);
+                                I->getOperand(2), DL, TLI, DT);
     break;
   case Instruction::GetElementPtr: {
     SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end());
-    Result = SimplifyGEPInst(Ops, TD, TLI, DT);
+    Result = SimplifyGEPInst(Ops, DL, TLI, DT);
     break;
   }
   case Instruction::InsertValue: {
     InsertValueInst *IV = cast<InsertValueInst>(I);
     Result = SimplifyInsertValueInst(IV->getAggregateOperand(),
                                      IV->getInsertedValueOperand(),
-                                     IV->getIndices(), TD, TLI, DT);
+                                     IV->getIndices(), DL, TLI, DT);
     break;
   }
   case Instruction::PHI:
-    Result = SimplifyPHINode(cast<PHINode>(I), Query (TD, TLI, DT));
+    Result = SimplifyPHINode(cast<PHINode>(I), Query (DL, TLI, DT));
     break;
   case Instruction::Call: {
     CallSite CS(cast<CallInst>(I));
     Result = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(),
-                          TD, TLI, DT);
+                          DL, TLI, DT);
     break;
   }
   case Instruction::Trunc:
-    Result = SimplifyTruncInst(I->getOperand(0), I->getType(), TD, TLI, DT);
+    Result = SimplifyTruncInst(I->getOperand(0), I->getType(), DL, TLI, DT);
     break;
   }
 
@@ -3168,7 +3191,7 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *TD,
 /// This routine returns 'true' only when *it* simplifies something. The passed
 /// in simplified value does not count toward this.
 static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
-                                              const DataLayout *TD,
+                                              const DataLayout *DL,
                                               const TargetLibraryInfo *TLI,
                                               const DominatorTree *DT) {
   bool Simplified = false;
@@ -3177,10 +3200,9 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
   // If we have an explicit value to collapse to, do that round of the
   // simplification loop by hand initially.
   if (SimpleV) {
-    for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE;
-         ++UI)
-      if (*UI != I)
-        Worklist.insert(cast<Instruction>(*UI));
+    for (User *U : I->users())
+      if (U != I)
+        Worklist.insert(cast<Instruction>(U));
 
     // Replace the instruction with its simplified value.
     I->replaceAllUsesWith(SimpleV);
@@ -3198,7 +3220,7 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
     I = Worklist[Idx];
 
     // See if this instruction simplifies.
-    SimpleV = SimplifyInstruction(I, TD, TLI, DT);
+    SimpleV = SimplifyInstruction(I, DL, TLI, DT);
     if (!SimpleV)
       continue;
 
@@ -3207,9 +3229,8 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
     // Stash away all the uses of the old instruction so we can check them for
     // recursive simplifications after a RAUW. This is cheaper than checking all
     // uses of To on the recursive step in most cases.
-    for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE;
-         ++UI)
-      Worklist.insert(cast<Instruction>(*UI));
+    for (User *U : I->users())
+      Worklist.insert(cast<Instruction>(U));
 
     // Replace the instruction with its simplified value.
     I->replaceAllUsesWith(SimpleV);
@@ -3223,17 +3244,17 @@ static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
 }
 
 bool llvm::recursivelySimplifyInstruction(Instruction *I,
-                                          const DataLayout *TD,
+                                          const DataLayout *DL,
                                           const TargetLibraryInfo *TLI,
                                           const DominatorTree *DT) {
-  return replaceAndRecursivelySimplifyImpl(I, 0, TD, TLI, DT);
+  return replaceAndRecursivelySimplifyImpl(I, 0, DL, TLI, DT);
 }
 
 bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV,
-                                         const DataLayout *TD,
+                                         const DataLayout *DL,
                                          const TargetLibraryInfo *TLI,
                                          const DominatorTree *DT) {
   assert(I != SimpleV && "replaceAndRecursivelySimplify(X,X) is not valid!");
   assert(SimpleV && "Must provide a simplified value.");
-  return replaceAndRecursivelySimplifyImpl(I, SimpleV, TD, TLI, DT);
+  return replaceAndRecursivelySimplifyImpl(I, SimpleV, DL, TLI, DT);
 }
diff --git a/lib/Analysis/Interval.cpp b/lib/Analysis/Interval.cpp
index 26a0322..e3e785f 100644
--- a/lib/Analysis/Interval.cpp
+++ b/lib/Analysis/Interval.cpp
@@ -14,7 +14,7 @@
 
 #include "llvm/Analysis/Interval.h"
 #include "llvm/IR/BasicBlock.h"
-#include "llvm/Support/CFG.h"
+#include "llvm/IR/CFG.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 
diff --git a/lib/Analysis/LazyCallGraph.cpp b/lib/Analysis/LazyCallGraph.cpp
new file mode 100644
index 0000000..ea213f2
--- /dev/null
+++ b/lib/Analysis/LazyCallGraph.cpp
@@ -0,0 +1,181 @@
+//===- LazyCallGraph.cpp - Analysis of a Module's call graph --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static void findCallees(
+    SmallVectorImpl<Constant *> &Worklist, SmallPtrSetImpl<Constant *> &Visited,
+    SmallVectorImpl<PointerUnion<Function *, LazyCallGraph::Node *>> &Callees,
+    SmallPtrSetImpl<Function *> &CalleeSet) {
+  while (!Worklist.empty()) {
+    Constant *C = Worklist.pop_back_val();
+
+    if (Function *F = dyn_cast<Function>(C)) {
+      // Note that we consider *any* function with a definition to be a viable
+      // edge. Even if the function's definition is subject to replacement by
+      // some other module (say, a weak definition) there may still be
+      // optimizations which essentially speculate based on the definition and
+      // a way to check that the specific definition is in fact the one being
+      // used. For example, this could be done by moving the weak definition to
+      // a strong (internal) definition and making the weak definition be an
+      // alias. Then a test of the address of the weak function against the new
+      // strong definition's address would be an effective way to determine the
+      // safety of optimizing a direct call edge.
+      if (!F->isDeclaration() && CalleeSet.insert(F))
+        Callees.push_back(F);
+      continue;
+    }
+
+    for (Value *Op : C->operand_values())
+      if (Visited.insert(cast<Constant>(Op)))
+        Worklist.push_back(cast<Constant>(Op));
+  }
+}
+
+LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F) : G(G), F(F) {
+  SmallVector<Constant *, 16> Worklist;
+  SmallPtrSet<Constant *, 16> Visited;
+  // Find all the potential callees in this function. First walk the
+  // instructions and add every operand which is a constant to the worklist.
+  for (BasicBlock &BB : F)
+    for (Instruction &I : BB)
+      for (Value *Op : I.operand_values())
+        if (Constant *C = dyn_cast<Constant>(Op))
+          if (Visited.insert(C))
+            Worklist.push_back(C);
+
+  // We've collected all the constant (and thus potentially function or
+  // function containing) operands to all of the instructions in the function.
+  // Process them (recursively) collecting every function found.
+  findCallees(Worklist, Visited, Callees, CalleeSet);
+}
+
+LazyCallGraph::Node::Node(LazyCallGraph &G, const Node &OtherN)
+    : G(G), F(OtherN.F), CalleeSet(OtherN.CalleeSet) {
+  // Loop over the other node's callees, adding the Function*s to our list
+  // directly, and recursing to add the Node*s.
+  Callees.reserve(OtherN.Callees.size());
+  for (const auto &OtherCallee : OtherN.Callees)
+    if (Function *Callee = OtherCallee.dyn_cast<Function *>())
+      Callees.push_back(Callee);
+    else
+      Callees.push_back(G.copyInto(*OtherCallee.get<Node *>()));
+}
+
+LazyCallGraph::Node::Node(LazyCallGraph &G, Node &&OtherN)
+    : G(G), F(OtherN.F), Callees(std::move(OtherN.Callees)),
+      CalleeSet(std::move(OtherN.CalleeSet)) {
+  // Loop over our Callees. They've been moved from another node, but we need
+  // to move the Node*s to live under our bump ptr allocator.
+  for (auto &Callee : Callees)
+    if (Node *ChildN = Callee.dyn_cast<Node *>())
+      Callee = G.moveInto(std::move(*ChildN));
+}
+
+LazyCallGraph::LazyCallGraph(Module &M) : M(M) {
+  for (Function &F : M)
+    if (!F.isDeclaration() && !F.hasLocalLinkage())
+      if (EntryNodeSet.insert(&F))
+        EntryNodes.push_back(&F);
+
+  // Now add entry nodes for functions reachable via initializers to globals.
+  SmallVector<Constant *, 16> Worklist;
+  SmallPtrSet<Constant *, 16> Visited;
+  for (GlobalVariable &GV : M.globals())
+    if (GV.hasInitializer())
+      if (Visited.insert(GV.getInitializer()))
+        Worklist.push_back(GV.getInitializer());
+
+  findCallees(Worklist, Visited, EntryNodes, EntryNodeSet);
+}
+
+LazyCallGraph::LazyCallGraph(const LazyCallGraph &G)
+    : M(G.M), EntryNodeSet(G.EntryNodeSet) {
+  EntryNodes.reserve(G.EntryNodes.size());
+  for (const auto &EntryNode : G.EntryNodes)
+    if (Function *Callee = EntryNode.dyn_cast<Function *>())
+      EntryNodes.push_back(Callee);
+    else
+      EntryNodes.push_back(copyInto(*EntryNode.get<Node *>()));
+}
+
+// FIXME: This would be crazy simpler if BumpPtrAllocator were movable without
+// invalidating any of the allocated memory. We should make that be the case at
+// some point and delete this.
+LazyCallGraph::LazyCallGraph(LazyCallGraph &&G)
+    : M(G.M), EntryNodes(std::move(G.EntryNodes)),
+      EntryNodeSet(std::move(G.EntryNodeSet)) {
+  // Loop over our EntryNodes. They've been moved from another graph, so we
+  // need to move the Node*s to live under our bump ptr allocator. We can just
+  // do this in-place.
+  for (auto &Entry : EntryNodes)
+    if (Node *EntryN = Entry.dyn_cast<Node *>())
+      Entry = moveInto(std::move(*EntryN));
+}
+
+LazyCallGraph::Node *LazyCallGraph::insertInto(Function &F, Node *&MappedN) {
+  return new (MappedN = BPA.Allocate()) Node(*this, F);
+}
+
+LazyCallGraph::Node *LazyCallGraph::copyInto(const Node &OtherN) {
+  Node *&N = NodeMap[&OtherN.F];
+  if (N)
+    return N;
+
+  return new (N = BPA.Allocate()) Node(*this, OtherN);
+}
+
+LazyCallGraph::Node *LazyCallGraph::moveInto(Node &&OtherN) {
+  Node *&N = NodeMap[&OtherN.F];
+  if (N)
+    return N;
+
+  return new (N = BPA.Allocate()) Node(*this, std::move(OtherN));
+}
+
+char LazyCallGraphAnalysis::PassID;
+
+LazyCallGraphPrinterPass::LazyCallGraphPrinterPass(raw_ostream &OS) : OS(OS) {}
+
+static void printNodes(raw_ostream &OS, LazyCallGraph::Node &N,
+                       SmallPtrSetImpl<LazyCallGraph::Node *> &Printed) {
+  // Recurse depth first through the nodes.
+  for (LazyCallGraph::Node *ChildN : N)
+    if (Printed.insert(ChildN))
+      printNodes(OS, *ChildN, Printed);
+
+  OS << "  Call edges in function: " << N.getFunction().getName() << "\n";
+  for (LazyCallGraph::iterator I = N.begin(), E = N.end(); I != E; ++I)
+    OS << "    -> " << I->getFunction().getName() << "\n";
+
+  OS << "\n";
+}
+
+PreservedAnalyses LazyCallGraphPrinterPass::run(Module *M,
+                                                ModuleAnalysisManager *AM) {
+  LazyCallGraph &G = AM->getResult<LazyCallGraphAnalysis>(M);
+
+  OS << "Printing the call graph for module: " << M->getModuleIdentifier()
+     << "\n\n";
+
+  SmallPtrSet<LazyCallGraph::Node *, 16> Printed;
+  for (LazyCallGraph::Node *N : G)
+    if (Printed.insert(N))
+      printNodes(OS, *N, Printed);
+
+  return PreservedAnalyses::all();
+}
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index b6970af..3d6c583 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -18,15 +18,15 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/ConstantRange.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/PatternMatch.h"
-#include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include <map>
@@ -302,9 +302,9 @@ namespace {
       
     LVIValueHandle(Value *V, LazyValueInfoCache *P)
       : CallbackVH(V), Parent(P) { }
-      
-    void deleted();
-    void allUsesReplacedWith(Value *V) {
+
+    void deleted() override;
+    void allUsesReplacedWith(Value *V) override {
       deleted();
     }
   };
@@ -1013,7 +1013,8 @@ bool LazyValueInfo::runOnFunction(Function &F) {
   if (PImpl)
     getCache(PImpl).clear();
 
-  TD = getAnalysisIfAvailable<DataLayout>();
+  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+  DL = DLP ? &DLP->getDataLayout() : 0;
   TLI = &getAnalysis<TargetLibraryInfo>();
 
   // Fully lazy.
@@ -1073,7 +1074,7 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
   // If we know the value is a constant, evaluate the conditional.
   Constant *Res = 0;
   if (Result.isConstant()) {
-    Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, TD,
+    Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, DL,
                                           TLI);
     if (ConstantInt *ResCI = dyn_cast<ConstantInt>(Res))
       return ResCI->isZero() ? False : True;
@@ -1115,14 +1116,14 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
     if (Pred == ICmpInst::ICMP_EQ) {
       // !C1 == C -> false iff C1 == C.
       Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
-                                            Result.getNotConstant(), C, TD,
+                                            Result.getNotConstant(), C, DL,
                                             TLI);
       if (Res->isNullValue())
         return False;
     } else if (Pred == ICmpInst::ICMP_NE) {
       // !C1 != C -> true iff C1 == C.
       Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
-                                            Result.getNotConstant(), C, TD,
+                                            Result.getNotConstant(), C, DL,
                                             TLI);
       if (Res->isNullValue())
         return True;
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index ec17f47..b2182b1 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -16,7 +16,7 @@
 // those aren't comprehensive either. Second, many conditions cannot be
 // checked statically. This pass does no dynamic instrumentation, so it
 // can't check for all possible problems.
-// 
+//
 // Another limitation is that it assumes all code will be executed. A store
 // through a null pointer in a basic block which is never reached is harmless,
 // but this pass will warn about it anyway. This is the main reason why most
@@ -26,31 +26,30 @@
 // less obvious. If an optimization pass appears to be introducing a warning,
 // it may be that the optimization pass is merely exposing an existing
 // condition in the code.
-// 
+//
 // This code may be run before instcombine. In many cases, instcombine checks
 // for the same kinds of things and turns instructions with undefined behavior
 // into unreachable (or equivalent). Because of this, this pass makes some
 // effort to look through bitcasts and so on.
-// 
+//
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/Lint.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Assembly/Writer.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/IntrinsicInst.h"
-#include "llvm/InstVisitor.h"
 #include "llvm/Pass.h"
 #include "llvm/PassManager.h"
-#include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetLibraryInfo.h"
@@ -103,7 +102,7 @@ namespace {
     Module *Mod;
     AliasAnalysis *AA;
     DominatorTree *DT;
-    DataLayout *TD;
+    const DataLayout *DL;
     TargetLibraryInfo *TLI;
 
     std::string Messages;
@@ -114,22 +113,22 @@ namespace {
       initializeLintPass(*PassRegistry::getPassRegistry());
     }
 
-    virtual bool runOnFunction(Function &F);
+    bool runOnFunction(Function &F) override;
 
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesAll();
       AU.addRequired<AliasAnalysis>();
       AU.addRequired<TargetLibraryInfo>();
-      AU.addRequired<DominatorTree>();
+      AU.addRequired<DominatorTreeWrapperPass>();
     }
-    virtual void print(raw_ostream &O, const Module *M) const {}
+    void print(raw_ostream &O, const Module *M) const override {}
 
     void WriteValue(const Value *V) {
       if (!V) return;
       if (isa<Instruction>(V)) {
         MessagesStr << *V << '\n';
       } else {
-        WriteAsOperand(MessagesStr, V, true, Mod);
+        V->printAsOperand(MessagesStr, true, Mod);
         MessagesStr << '\n';
       }
     }
@@ -153,7 +152,7 @@ char Lint::ID = 0;
 INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR",
                       false, true)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
                     false, true)
@@ -176,8 +175,9 @@ INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
 bool Lint::runOnFunction(Function &F) {
   Mod = F.getParent();
   AA = &getAnalysis<AliasAnalysis>();
-  DT = &getAnalysis<DominatorTree>();
-  TD = getAnalysisIfAvailable<DataLayout>();
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+  DL = DLP ? &DLP->getDataLayout() : 0;
   TLI = &getAnalysis<TargetLibraryInfo>();
   visit(F);
   dbgs() << MessagesStr.str();
@@ -248,7 +248,7 @@ void Lint::visitCallSite(CallSite CS) {
           Type *Ty =
             cast<PointerType>(Formal->getType())->getElementType();
           visitMemoryReference(I, Actual, AA->getTypeStoreSize(Ty),
-                               TD ? TD->getABITypeAlignment(Ty) : 0,
+                               DL ? DL->getABITypeAlignment(Ty) : 0,
                                Ty, MemRef::Read | MemRef::Write);
         }
       }
@@ -415,7 +415,7 @@ void Lint::visitMemoryReference(Instruction &I,
   // Only handles memory references that read/write something simple like an
   // alloca instruction or a global variable.
   int64_t Offset = 0;
-  if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, TD)) {
+  if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, DL)) {
     // OK, so the access is to a constant offset from Ptr.  Check that Ptr is
     // something we can handle and if so extract the size of this base object
     // along with its alignment.
@@ -424,21 +424,21 @@ void Lint::visitMemoryReference(Instruction &I,
 
     if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
       Type *ATy = AI->getAllocatedType();
-      if (TD && !AI->isArrayAllocation() && ATy->isSized())
-        BaseSize = TD->getTypeAllocSize(ATy);
+      if (DL && !AI->isArrayAllocation() && ATy->isSized())
+        BaseSize = DL->getTypeAllocSize(ATy);
       BaseAlign = AI->getAlignment();
-      if (TD && BaseAlign == 0 && ATy->isSized())
-        BaseAlign = TD->getABITypeAlignment(ATy);
+      if (DL && BaseAlign == 0 && ATy->isSized())
+        BaseAlign = DL->getABITypeAlignment(ATy);
     } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
       // If the global may be defined differently in another compilation unit
       // then don't warn about funky memory accesses.
       if (GV->hasDefinitiveInitializer()) {
         Type *GTy = GV->getType()->getElementType();
-        if (TD && GTy->isSized())
-          BaseSize = TD->getTypeAllocSize(GTy);
+        if (DL && GTy->isSized())
+          BaseSize = DL->getTypeAllocSize(GTy);
         BaseAlign = GV->getAlignment();
-        if (TD && BaseAlign == 0 && GTy->isSized())
-          BaseAlign = TD->getABITypeAlignment(GTy);
+        if (DL && BaseAlign == 0 && GTy->isSized())
+          BaseAlign = DL->getABITypeAlignment(GTy);
       }
     }
 
@@ -451,8 +451,8 @@ void Lint::visitMemoryReference(Instruction &I,
 
     // Accesses that say that the memory is more aligned than it is are not
     // defined.
-    if (TD && Align == 0 && Ty && Ty->isSized())
-      Align = TD->getABITypeAlignment(Ty);
+    if (DL && Align == 0 && Ty && Ty->isSized())
+      Align = DL->getABITypeAlignment(Ty);
     Assert1(!BaseAlign || Align <= MinAlign(BaseAlign, Offset),
             "Undefined behavior: Memory reference address is misaligned", &I);
   }
@@ -504,7 +504,7 @@ void Lint::visitShl(BinaryOperator &I) {
             "Undefined result: Shift count out of range", &I);
 }
 
-static bool isZero(Value *V, DataLayout *DL) {
+static bool isZero(Value *V, const DataLayout *DL) {
   // Assume undef could be zero.
   if (isa<UndefValue>(V))
     return true;
@@ -543,22 +543,22 @@ static bool isZero(Value *V, DataLayout *DL) {
 }
 
 void Lint::visitSDiv(BinaryOperator &I) {
-  Assert1(!isZero(I.getOperand(1), TD),
+  Assert1(!isZero(I.getOperand(1), DL),
           "Undefined behavior: Division by zero", &I);
 }
 
 void Lint::visitUDiv(BinaryOperator &I) {
-  Assert1(!isZero(I.getOperand(1), TD),
+  Assert1(!isZero(I.getOperand(1), DL),
           "Undefined behavior: Division by zero", &I);
 }
 
 void Lint::visitSRem(BinaryOperator &I) {
-  Assert1(!isZero(I.getOperand(1), TD),
+  Assert1(!isZero(I.getOperand(1), DL),
           "Undefined behavior: Division by zero", &I);
 }
 
 void Lint::visitURem(BinaryOperator &I) {
-  Assert1(!isZero(I.getOperand(1), TD),
+  Assert1(!isZero(I.getOperand(1), DL),
           "Undefined behavior: Division by zero", &I);
 }
 
@@ -603,7 +603,7 @@ void Lint::visitInsertElementInst(InsertElementInst &I) {
 void Lint::visitUnreachableInst(UnreachableInst &I) {
   // This isn't undefined behavior, it's merely suspicious.
   Assert1(&I == I.getParent()->begin() ||
-          prior(BasicBlock::iterator(&I))->mayHaveSideEffects(),
+          std::prev(BasicBlock::iterator(&I))->mayHaveSideEffects(),
           "Unusual: unreachable immediately preceded by instruction without "
           "side effects", &I);
 }
@@ -632,7 +632,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
   // TODO: Look through eliminable cast pairs.
   // TODO: Look through calls with unique return values.
   // TODO: Look through vector insert/extract/shuffle.
-  V = OffsetOk ? GetUnderlyingObject(V, TD) : V->stripPointerCasts();
+  V = OffsetOk ? GetUnderlyingObject(V, DL) : V->stripPointerCasts();
   if (LoadInst *L = dyn_cast<LoadInst>(V)) {
     BasicBlock::iterator BBI = L;
     BasicBlock *BB = L->getParent();
@@ -652,8 +652,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
       if (W != V)
         return findValueImpl(W, OffsetOk, Visited);
   } else if (CastInst *CI = dyn_cast<CastInst>(V)) {
-    if (CI->isNoopCast(TD ? TD->getIntPtrType(V->getContext()) :
-                            Type::getInt64Ty(V->getContext())))
+    if (CI->isNoopCast(DL))
       return findValueImpl(CI->getOperand(0), OffsetOk, Visited);
   } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) {
     if (Value *W = FindInsertedValue(Ex->getAggregateOperand(),
@@ -666,7 +665,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
       if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()),
                                CE->getOperand(0)->getType(),
                                CE->getType(),
-                               TD ? TD->getIntPtrType(V->getContext()) :
+                               DL ? DL->getIntPtrType(V->getType()) :
                                     Type::getInt64Ty(V->getContext())))
         return findValueImpl(CE->getOperand(0), OffsetOk, Visited);
     } else if (CE->getOpcode() == Instruction::ExtractValue) {
@@ -679,10 +678,10 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
 
   // As a last resort, try SimplifyInstruction or constant folding.
   if (Instruction *Inst = dyn_cast<Instruction>(V)) {
-    if (Value *W = SimplifyInstruction(Inst, TD, TLI, DT))
+    if (Value *W = SimplifyInstruction(Inst, DL, TLI, DT))
       return findValueImpl(W, OffsetOk, Visited);
   } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
-    if (Value *W = ConstantFoldConstantExpression(CE, TD, TLI))
+    if (Value *W = ConstantFoldConstantExpression(CE, DL, TLI))
       if (W != V)
         return findValueImpl(W, OffsetOk, Visited);
   }
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index e369633..b38672e 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -17,15 +17,14 @@
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfoImpl.h"
 #include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Assembly/Writer.h"
+#include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Metadata.h"
-#include "llvm/Support/CFG.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include <algorithm>
@@ -47,7 +46,7 @@ VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo),
 
 char LoopInfo::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopInfo, "loops", "Natural Loop Information", true, true)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_END(LoopInfo, "loops", "Natural Loop Information", true, true)
 
 // Loop identifier metadata name.
@@ -180,12 +179,11 @@ bool Loop::isLCSSAForm(DominatorTree &DT) const {
   for (block_iterator BI = block_begin(), E = block_end(); BI != E; ++BI) {
     BasicBlock *BB = *BI;
     for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I)
-      for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
-           ++UI) {
-        User *U = *UI;
-        BasicBlock *UserBB = cast<Instruction>(U)->getParent();
-        if (PHINode *P = dyn_cast<PHINode>(U))
-          UserBB = P->getIncomingBlock(UI);
+      for (Use &U : I->uses()) {
+        Instruction *UI = cast<Instruction>(U.getUser());
+        BasicBlock *UserBB = UI->getParent();
+        if (PHINode *P = dyn_cast<PHINode>(UI))
+          UserBB = P->getIncomingBlock(U);
 
         // Check the current block, as a fast-path, before checking whether
         // the use is anywhere in the loop.  Most values are used in the same
@@ -220,12 +218,12 @@ bool Loop::isSafeToClone() const {
       return false;
 
     if (const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator()))
-      if (II->hasFnAttr(Attribute::NoDuplicate))
+      if (II->cannotDuplicate())
         return false;
 
     for (BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end(); BI != BE; ++BI) {
       if (const CallInst *CI = dyn_cast<CallInst>(BI)) {
-        if (CI->hasFnAttr(Attribute::NoDuplicate))
+        if (CI->cannotDuplicate())
           return false;
       }
     }
@@ -528,8 +526,8 @@ void UnloopUpdater::removeBlocksFromAncestors() {
 /// nested within unloop.
 void UnloopUpdater::updateSubloopParents() {
   while (!Unloop->empty()) {
-    Loop *Subloop = *llvm::prior(Unloop->end());
-    Unloop->removeChildLoop(llvm::prior(Unloop->end()));
+    Loop *Subloop = *std::prev(Unloop->end());
+    Unloop->removeChildLoop(std::prev(Unloop->end()));
 
     assert(SubloopParents.count(Subloop) && "DFS failed to visit subloop");
     if (Loop *Parent = SubloopParents[Subloop])
@@ -614,7 +612,7 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) {
 //
 bool LoopInfo::runOnFunction(Function &) {
   releaseMemory();
-  LI.Analyze(getAnalysis<DominatorTree>().getBase());
+  LI.Analyze(getAnalysis<DominatorTreeWrapperPass>().getDomTree());
   return false;
 }
 
@@ -653,7 +651,7 @@ void LoopInfo::updateUnloop(Loop *Unloop) {
 
     // Move all of the subloops to the top-level.
     while (!Unloop->empty())
-      LI.addTopLevelLoop(Unloop->removeChildLoop(llvm::prior(Unloop->end())));
+      LI.addTopLevelLoop(Unloop->removeChildLoop(std::prev(Unloop->end())));
 
     return;
   }
@@ -705,7 +703,7 @@ void LoopInfo::verifyAnalysis() const {
 
 void LoopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
-  AU.addRequired<DominatorTree>();
+  AU.addRequired<DominatorTreeWrapperPass>();
 }
 
 void LoopInfo::print(raw_ostream &OS, const Module*) const {
diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp
index acf2ba6..38e753f 100644
--- a/lib/Analysis/LoopPass.cpp
+++ b/lib/Analysis/LoopPass.cpp
@@ -14,7 +14,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/LoopPass.h"
-#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Timer.h"
 using namespace llvm;
@@ -33,11 +33,11 @@ public:
   PrintLoopPass(const std::string &B, raw_ostream &o)
       : LoopPass(ID), Banner(B), Out(o) {}
 
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesAll();
   }
 
-  bool runOnLoop(Loop *L, LPPassManager &) {
+  bool runOnLoop(Loop *L, LPPassManager &) override {
     Out << Banner;
     for (Loop::block_iterator b = L->block_begin(), be = L->block_end();
          b != be;
@@ -365,3 +365,17 @@ void LoopPass::assignPassManager(PMStack &PMS,
 
   LPPM->add(this);
 }
+
+// Containing function has Attribute::OptimizeNone and transformation
+// passes should skip it.
+bool LoopPass::skipOptnoneFunction(const Loop *L) const {
+  const Function *F = L->getHeader()->getParent();
+  if (F && F->hasFnAttribute(Attribute::OptimizeNone)) {
+    // FIXME: Report this to dbgs() only once per function.
+    DEBUG(dbgs() << "Skipping pass '" << getPassName()
+          << "' in function " << F->getName() << "\n");
+    // FIXME: Delete loop from pass manager's queue?
+    return true;
+  }
+  return false;
+}
diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp
index d26aaf1..bc1dc69 100644
--- a/lib/Analysis/MemDepPrinter.cpp
+++ b/lib/Analysis/MemDepPrinter.cpp
@@ -13,11 +13,10 @@
 #include "llvm/Analysis/Passes.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
-#include "llvm/Assembly/Writer.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/InstIterator.h"
 #include "llvm/IR/LLVMContext.h"
-#include "llvm/Support/CallSite.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/InstIterator.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -45,17 +44,17 @@ namespace {
       initializeMemDepPrinterPass(*PassRegistry::getPassRegistry());
     }
 
-    virtual bool runOnFunction(Function &F);
+    bool runOnFunction(Function &F) override;
 
-    void print(raw_ostream &OS, const Module * = 0) const;
+    void print(raw_ostream &OS, const Module * = 0) const override;
 
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequiredTransitive<AliasAnalysis>();
       AU.addRequiredTransitive<MemoryDependenceAnalysis>();
       AU.setPreservesAll();
     }
 
-    virtual void releaseMemory() {
+    void releaseMemory() override {
       Deps.clear();
       F = 0;
     }
@@ -68,7 +67,7 @@ namespace {
         return InstTypePair(dep.getInst(), Def);
       if (dep.isNonFuncLocal())
         return InstTypePair(dep.getInst(), NonFuncLocal);
-      assert(dep.isUnknown() && "unexptected dependence type");
+      assert(dep.isUnknown() && "unexpected dependence type");
       return InstTypePair(dep.getInst(), Unknown);
     }
     static InstTypePair getInstTypePair(const Instruction* inst, DepType type) {
@@ -177,7 +176,7 @@ void MemDepPrinter::print(raw_ostream &OS, const Module *M) const {
       OS << DepTypeStr[type];
       if (DepBB) {
         OS << " in block ";
-        WriteAsOperand(OS, DepBB, /*PrintType=*/false, M);
+        DepBB->printAsOperand(OS, /*PrintType=*/false, M);
       }
       if (DepInst) {
         OS << " from: ";
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index 1db0f63..1dba323 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -261,8 +261,8 @@ PointerType *llvm::getMallocType(const CallInst *CI,
   unsigned NumOfBitCastUses = 0;
 
   // Determine if CallInst has a bitcast use.
-  for (Value::const_use_iterator UI = CI->use_begin(), E = CI->use_end();
-       UI != E; )
+  for (Value::const_user_iterator UI = CI->user_begin(), E = CI->user_end();
+       UI != E;)
     if (const BitCastInst *BCI = dyn_cast<BitCastInst>(*UI++)) {
       MallocType = cast<PointerType>(BCI->getDestTy());
       NumOfBitCastUses++;
@@ -399,12 +399,14 @@ ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *DL,
                                                  LLVMContext &Context,
                                                  bool RoundToAlign)
 : DL(DL), TLI(TLI), RoundToAlign(RoundToAlign) {
-  IntegerType *IntTy = DL->getIntPtrType(Context);
-  IntTyBits = IntTy->getBitWidth();
-  Zero = APInt::getNullValue(IntTyBits);
+  // Pointer size must be rechecked for each object visited since it could have
+  // a different address space.
 }
 
 SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
+  IntTyBits = DL->getPointerTypeSizeInBits(V->getType());
+  Zero = APInt::getNullValue(IntTyBits);
+
   V = V->stripPointerCasts();
   if (Instruction *I = dyn_cast<Instruction>(V)) {
     // If we have already seen this instruction, bail out. Cycles can happen in
@@ -456,7 +458,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) {
 
 SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) {
   // no interprocedural analysis is done at the moment
-  if (!A.hasByValAttr()) {
+  if (!A.hasByValOrInAllocaAttr()) {
     ++ObjectVisitorArgument;
     return unknown();
   }
@@ -592,11 +594,15 @@ ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const DataLayout *DL,
                                                      bool RoundToAlign)
 : DL(DL), TLI(TLI), Context(Context), Builder(Context, TargetFolder(DL)),
   RoundToAlign(RoundToAlign) {
-  IntTy = DL->getIntPtrType(Context);
-  Zero = ConstantInt::get(IntTy, 0);
+  // IntTy and Zero must be set for each compute() since the address space may
+  // be different for later objects.
 }
 
 SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) {
+  // XXX - Are vectors of pointers possible here?
+  IntTy = cast<IntegerType>(DL->getIntPtrType(V->getType()));
+  Zero = ConstantInt::get(IntTy, 0);
+
   SizeOffsetEvalType Result = compute_(V);
 
   if (!bothKnown(Result)) {
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 84ff2ee..015ded1 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -19,18 +19,18 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/PHITransAddr.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/PredIteratorCache.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/PredIteratorCache.h"
 using namespace llvm;
 
 STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
@@ -59,7 +59,7 @@ INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep",
                       "Memory Dependence Analysis", false, true)
 
 MemoryDependenceAnalysis::MemoryDependenceAnalysis()
-: FunctionPass(ID), PredCache(0) {
+    : FunctionPass(ID), PredCache() {
   initializeMemoryDependenceAnalysisPass(*PassRegistry::getPassRegistry());
 }
 MemoryDependenceAnalysis::~MemoryDependenceAnalysis() {
@@ -87,8 +87,11 @@ void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
 
 bool MemoryDependenceAnalysis::runOnFunction(Function &) {
   AA = &getAnalysis<AliasAnalysis>();
-  TD = getAnalysisIfAvailable<DataLayout>();
-  DT = getAnalysisIfAvailable<DominatorTree>();
+  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+  DL = DLP ? &DLP->getDataLayout() : 0;
+  DominatorTreeWrapperPass *DTWP =
+      getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+  DT = DTWP ? &DTWP->getDomTree() : 0;
   if (!PredCache)
     PredCache.reset(new PredIteratorCache());
   return false;
@@ -256,17 +259,17 @@ isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc,
                                        const Value *&MemLocBase,
                                        int64_t &MemLocOffs,
                                        const LoadInst *LI,
-                                       const DataLayout *TD) {
+                                       const DataLayout *DL) {
   // If we have no target data, we can't do this.
-  if (TD == 0) return false;
+  if (DL == 0) return false;
 
   // If we haven't already computed the base/offset of MemLoc, do so now.
   if (MemLocBase == 0)
-    MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, TD);
+    MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, DL);
 
   unsigned Size = MemoryDependenceAnalysis::
     getLoadLoadClobberFullWidthSize(MemLocBase, MemLocOffs, MemLoc.Size,
-                                    LI, *TD);
+                                    LI, *DL);
   return Size != 0;
 }
 
@@ -280,7 +283,7 @@ isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc,
 unsigned MemoryDependenceAnalysis::
 getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
                                 unsigned MemLocSize, const LoadInst *LI,
-                                const DataLayout &TD) {
+                                const DataLayout &DL) {
   // We can only extend simple integer loads.
   if (!isa<IntegerType>(LI->getType()) || !LI->isSimple()) return 0;
 
@@ -293,7 +296,7 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
   // Get the base of this load.
   int64_t LIOffs = 0;
   const Value *LIBase =
-    GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, &TD);
+    GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, &DL);
 
   // If the two pointers are not based on the same pointer, we can't tell that
   // they are related.
@@ -329,7 +332,7 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
     // If this load size is bigger than our known alignment or would not fit
     // into a native integer register, then we fail.
     if (NewLoadByteSize > LoadAlign ||
-        !TD.fitsInLegalInteger(NewLoadByteSize*8))
+        !DL.fitsInLegalInteger(NewLoadByteSize*8))
       return 0;
 
     if (LIOffs+NewLoadByteSize > MemLocEnd &&
@@ -422,7 +425,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
           if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType()))
             if (LI->getAlignment()*8 > ITy->getPrimitiveSizeInBits() &&
                 isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase,
-                                                       MemLocOffset, LI, TD))
+                                                       MemLocOffset, LI, DL))
               return MemDepResult::getClobber(Inst);
 
           continue;
@@ -498,7 +501,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
     // need to continue scanning until the malloc call.
     const TargetLibraryInfo *TLI = AA->getTargetLibraryInfo();
     if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, TLI)) {
-      const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, TD);
+      const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, DL);
 
       if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr))
         return MemDepResult::getDef(Inst);
@@ -690,7 +693,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
     NonLocalDepInfo::iterator Entry =
       std::upper_bound(Cache.begin(), Cache.begin()+NumSortedEntries,
                        NonLocalDepEntry(DirtyBB));
-    if (Entry != Cache.begin() && prior(Entry)->getBB() == DirtyBB)
+    if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB)
       --Entry;
 
     NonLocalDepEntry *ExistingResult = 0;
@@ -771,7 +774,7 @@ getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad,
          "Can't get pointer deps of a non-pointer!");
   Result.clear();
 
-  PHITransAddr Address(const_cast<Value *>(Loc.Ptr), TD);
+  PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL);
 
   // This is the set of blocks we've inspected, and the pointer we consider in
   // each block.  Because of critical edges, we currently bail out if querying
diff --git a/lib/Analysis/ModuleDebugInfoPrinter.cpp b/lib/Analysis/ModuleDebugInfoPrinter.cpp
index 0341537..f645558 100644
--- a/lib/Analysis/ModuleDebugInfoPrinter.cpp
+++ b/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -17,8 +17,7 @@
 
 #include "llvm/Analysis/Passes.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/DebugInfo.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -34,12 +33,12 @@ namespace {
       initializeModuleDebugInfoPrinterPass(*PassRegistry::getPassRegistry());
     }
 
-    virtual bool runOnModule(Module &M);
+    bool runOnModule(Module &M) override;
 
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesAll();
     }
-    virtual void print(raw_ostream &O, const Module *M) const;
+    void print(raw_ostream &O, const Module *M) const override;
   };
 }
 
@@ -57,31 +56,27 @@ bool ModuleDebugInfoPrinter::runOnModule(Module &M) {
 }
 
 void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const {
-  for (DebugInfoFinder::iterator I = Finder.compile_unit_begin(),
-       E = Finder.compile_unit_end(); I != E; ++I) {
+  for (DICompileUnit CU : Finder.compile_units()) {
     O << "Compile Unit: ";
-    DICompileUnit(*I).print(O);
+    CU.print(O);
     O << '\n';
   }
 
-  for (DebugInfoFinder::iterator I = Finder.subprogram_begin(),
-       E = Finder.subprogram_end(); I != E; ++I) {
+  for (DISubprogram S : Finder.subprograms()) {
     O << "Subprogram: ";
-    DISubprogram(*I).print(O);
+    S.print(O);
     O << '\n';
   }
 
-  for (DebugInfoFinder::iterator I = Finder.global_variable_begin(),
-       E = Finder.global_variable_end(); I != E; ++I) {
+  for (DIGlobalVariable GV : Finder.global_variables()) {
     O << "GlobalVariable: ";
-    DIGlobalVariable(*I).print(O);
+    GV.print(O);
     O << '\n';
   }
 
-  for (DebugInfoFinder::iterator I = Finder.type_begin(),
-       E = Finder.type_end(); I != E; ++I) {
+  for (DIType T : Finder.types()) {
     O << "Type: ";
-    DIType(*I).print(O);
+    T.print(O);
     O << '\n';
   }
 }
diff --git a/lib/Analysis/NOTICE b/lib/Analysis/NOTICE
deleted file mode 100644
index 1b1047c..0000000
--- a/lib/Analysis/NOTICE
+++ /dev/null
@@ -1,69 +0,0 @@
-==============================================================================
-LLVM Release License
-==============================================================================
-University of Illinois/NCSA
-Open Source License
-
-Copyright (c) 2003-2011 University of Illinois at Urbana-Champaign.
-All rights reserved.
-
-Developed by:
-
-    LLVM Team
-
-    University of Illinois at Urbana-Champaign
-
-    http://llvm.org
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of
-this software and associated documentation files (the "Software"), to deal with
-the Software without restriction, including without limitation the rights to
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-of the Software, and to permit persons to whom the Software is furnished to do
-so, subject to the following conditions:
-
-    * Redistributions of source code must retain the above copyright notice,
-      this list of conditions and the following disclaimers.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-      this list of conditions and the following disclaimers in the
-      documentation and/or other materials provided with the distribution.
-
-    * Neither the names of the LLVM Team, University of Illinois at
-      Urbana-Champaign, nor the names of its contributors may be used to
-      endorse or promote products derived from this Software without specific
-      prior written permission.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
-FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
-SOFTWARE.
-
-==============================================================================
-Copyrights and Licenses for Third Party Software Distributed with LLVM:
-==============================================================================
-The LLVM software contains code written by third parties.  Such software will
-have its own individual LICENSE.TXT file in the directory in which it appears.
-This file will describe the copyrights, license, and restrictions which apply
-to that code.
-
-The disclaimer of warranty in the University of Illinois Open Source License
-applies to all code in the LLVM Distribution, and nothing in any of the
-other licenses gives permission to use the names of the LLVM Team or the
-University of Illinois to endorse or promote products derived from this
-Software.
-
-The following pieces of software have additional or alternate copyrights,
-licenses, and/or restrictions:
-
-Program             Directory
--------             ---------
-Autoconf            llvm/autoconf
-                    llvm/projects/ModuleMaker/autoconf
-                    llvm/projects/sample/autoconf
-CellSPU backend     llvm/lib/Target/CellSPU/README.txt
-Google Test         llvm/utils/unittest/googletest
-OpenBSD regex       llvm/lib/Support/{reg*, COPYRIGHT.regex}
diff --git a/lib/Analysis/NoAliasAnalysis.cpp b/lib/Analysis/NoAliasAnalysis.cpp
index 907e962..0c119d6 100644
--- a/lib/Analysis/NoAliasAnalysis.cpp
+++ b/lib/Analysis/NoAliasAnalysis.cpp
@@ -30,48 +30,47 @@ namespace {
       initializeNoAAPass(*PassRegistry::getPassRegistry());
     }
 
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-    }
+    void getAnalysisUsage(AnalysisUsage &AU) const override {}
 
-    virtual void initializePass() {
+    void initializePass() override {
       // Note: NoAA does not call InitializeAliasAnalysis because it's
       // special and does not support chaining.
-      TD = getAnalysisIfAvailable<DataLayout>();
+      DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+      DL = DLP ? &DLP->getDataLayout() : 0;
     }
 
-    virtual AliasResult alias(const Location &LocA, const Location &LocB) {
+    AliasResult alias(const Location &LocA, const Location &LocB) override {
       return MayAlias;
     }
 
-    virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS) {
+    ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override {
       return UnknownModRefBehavior;
     }
-    virtual ModRefBehavior getModRefBehavior(const Function *F) {
+    ModRefBehavior getModRefBehavior(const Function *F) override {
       return UnknownModRefBehavior;
     }
 
-    virtual bool pointsToConstantMemory(const Location &Loc,
-                                        bool OrLocal) {
+    bool pointsToConstantMemory(const Location &Loc, bool OrLocal) override {
       return false;
     }
-    virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
-                                       const Location &Loc) {
+    ModRefResult getModRefInfo(ImmutableCallSite CS,
+                               const Location &Loc) override {
       return ModRef;
     }
-    virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
-                                       ImmutableCallSite CS2) {
+    ModRefResult getModRefInfo(ImmutableCallSite CS1,
+                               ImmutableCallSite CS2) override {
       return ModRef;
     }
 
-    virtual void deleteValue(Value *V) {}
-    virtual void copyValue(Value *From, Value *To) {}
-    virtual void addEscapingUse(Use &U) {}
-    
+    void deleteValue(Value *V) override {}
+    void copyValue(Value *From, Value *To) override {}
+    void addEscapingUse(Use &U) override {}
+
     /// getAdjustedAnalysisPointer - This method is used when a pass implements
     /// an analysis interface through multiple inheritance.  If needed, it
     /// should override this to adjust the this pointer as needed for the
     /// specified pass info.
-    virtual void *getAdjustedAnalysisPointer(const void *ID) {
+    void *getAdjustedAnalysisPointer(const void *ID) override {
       if (ID == &AliasAnalysis::ID)
         return (AliasAnalysis*)this;
       return this;
diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp
index e6af066..ad3685a 100644
--- a/lib/Analysis/PHITransAddr.cpp
+++ b/lib/Analysis/PHITransAddr.cpp
@@ -12,10 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/PHITransAddr.h"
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -72,7 +72,7 @@ static bool VerifySubExpr(Value *Expr,
   // If it isn't in the InstInputs list it is a subexpr incorporated into the
   // address.  Sanity check that it is phi translatable.
   if (!CanPHITrans(I)) {
-    errs() << "Non phi translatable instruction found in PHITransAddr:\n";
+    errs() << "Instruction in PHITransAddr is not phi-translatable:\n";
     errs() << *I << '\n';
     llvm_unreachable("Either something is missing from InstInputs or "
                      "CanPHITrans is wrong.");
@@ -202,9 +202,8 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
 
     // Otherwise we have to see if a casted version of the incoming pointer
     // is available.  If so, we can use it, otherwise we have to fail.
-    for (Value::use_iterator UI = PHIIn->use_begin(), E = PHIIn->use_end();
-         UI != E; ++UI) {
-      if (CastInst *CastI = dyn_cast<CastInst>(*UI))
+    for (User *U : PHIIn->users()) {
+      if (CastInst *CastI = dyn_cast<CastInst>(U))
         if (CastI->getOpcode() == Cast->getOpcode() &&
             CastI->getType() == Cast->getType() &&
             (!DT || DT->dominates(CastI->getParent(), PredBB)))
@@ -229,7 +228,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
       return GEP;
 
     // Simplify the GEP to handle 'gep x, 0' -> x etc.
-    if (Value *V = SimplifyGEPInst(GEPOps, TD, TLI, DT)) {
+    if (Value *V = SimplifyGEPInst(GEPOps, DL, TLI, DT)) {
       for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
         RemoveInstInputs(GEPOps[i], InstInputs);
 
@@ -238,9 +237,8 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
 
     // Scan to see if we have this GEP available.
     Value *APHIOp = GEPOps[0];
-    for (Value::use_iterator UI = APHIOp->use_begin(), E = APHIOp->use_end();
-         UI != E; ++UI) {
-      if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI))
+    for (User *U : APHIOp->users()) {
+      if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U))
         if (GEPI->getType() == GEP->getType() &&
             GEPI->getNumOperands() == GEPOps.size() &&
             GEPI->getParent()->getParent() == CurBB->getParent() &&
@@ -285,7 +283,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
         }
 
     // See if the add simplifies away.
-    if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD, TLI, DT)) {
+    if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, DL, TLI, DT)) {
       // If we simplified the operands, the LHS is no longer an input, but Res
       // is.
       RemoveInstInputs(LHS, InstInputs);
@@ -297,9 +295,8 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
       return Inst;
 
     // Otherwise, see if we have this add available somewhere.
-    for (Value::use_iterator UI = LHS->use_begin(), E = LHS->use_end();
-         UI != E; ++UI) {
-      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(*UI))
+    for (User *U : LHS->users()) {
+      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U))
         if (BO->getOpcode() == Instruction::Add &&
             BO->getOperand(0) == LHS && BO->getOperand(1) == RHS &&
             BO->getParent()->getParent() == CurBB->getParent() &&
@@ -372,7 +369,7 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
                            SmallVectorImpl<Instruction*> &NewInsts) {
   // See if we have a version of this value already available and dominating
   // PredBB.  If so, there is no need to insert a new instance of it.
-  PHITransAddr Tmp(InVal, TD);
+  PHITransAddr Tmp(InVal, DL);
   if (!Tmp.PHITranslateValue(CurBB, PredBB, &DT))
     return Tmp.getAddr();
 
diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp
index 96804a0..f23833a 100644
--- a/lib/Analysis/PostDominators.cpp
+++ b/lib/Analysis/PostDominators.cpp
@@ -16,11 +16,10 @@
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SetOperations.h"
-#include "llvm/Analysis/DominatorInternals.h"
-#include "llvm/Assembly/Writer.h"
+#include "llvm/IR/CFG.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/GenericDomTreeConstruction.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Analysis/PtrUseVisitor.cpp b/lib/Analysis/PtrUseVisitor.cpp
index 0a342b2..1b0f359 100644
--- a/lib/Analysis/PtrUseVisitor.cpp
+++ b/lib/Analysis/PtrUseVisitor.cpp
@@ -16,14 +16,13 @@
 using namespace llvm;
 
 void detail::PtrUseVisitorBase::enqueueUsers(Instruction &I) {
-  for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
-       UI != UE; ++UI) {
-    if (VisitedUses.insert(&UI.getUse())) {
+  for (Use &U : I.uses()) {
+    if (VisitedUses.insert(&U)) {
       UseToVisit NewU = {
-        UseToVisit::UseAndIsOffsetKnownPair(&UI.getUse(), IsOffsetKnown),
+        UseToVisit::UseAndIsOffsetKnownPair(&U, IsOffsetKnown),
         Offset
       };
-      Worklist.push_back(llvm_move(NewU));
+      Worklist.push_back(std::move(NewU));
     }
   }
 }
diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp
index 5635688..f4da598 100644
--- a/lib/Analysis/RegionInfo.cpp
+++ b/lib/Analysis/RegionInfo.cpp
@@ -15,10 +15,9 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/RegionIterator.h"
-#include "llvm/Assembly/Writer.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include <algorithm>
 #include <set>
 
@@ -215,7 +214,7 @@ std::string Region::getNameStr() const {
   if (getEntry()->getName().empty()) {
     raw_string_ostream OS(entryName);
 
-    WriteAsOperand(OS, getEntry(), false);
+    getEntry()->printAsOperand(OS, false);
   } else
     entryName = getEntry()->getName();
 
@@ -223,7 +222,7 @@ std::string Region::getNameStr() const {
     if (getExit()->getName().empty()) {
       raw_string_ostream OS(exitName);
 
-      WriteAsOperand(OS, getExit(), false);
+      getExit()->printAsOperand(OS, false);
     } else
       exitName = getExit()->getName();
   } else
@@ -439,8 +438,8 @@ void Region::print(raw_ostream &OS, bool print_tree, unsigned level,
     OS.indent(level*2 + 2);
 
     if (Style == PrintBB) {
-      for (const_block_iterator I = block_begin(), E = block_end(); I != E; ++I)
-        OS << (*I)->getName() << ", "; // TODO: remove the last ","
+      for (const auto &BB : blocks())
+        OS << BB->getName() << ", "; // TODO: remove the last ","
     } else if (Style == PrintRN) {
       for (const_element_iterator I = element_begin(), E = element_end(); I!=E; ++I)
         OS << **I << ", "; // TODO: remove the last ",
@@ -707,7 +706,7 @@ void RegionInfo::Calculate(Function &F) {
 bool RegionInfo::runOnFunction(Function &F) {
   releaseMemory();
 
-  DT = &getAnalysis<DominatorTree>();
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   PDT = &getAnalysis<PostDominatorTree>();
   DF = &getAnalysis<DominanceFrontier>();
 
@@ -721,7 +720,7 @@ bool RegionInfo::runOnFunction(Function &F) {
 
 void RegionInfo::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
-  AU.addRequiredTransitive<DominatorTree>();
+  AU.addRequiredTransitive<DominatorTreeWrapperPass>();
   AU.addRequired<PostDominatorTree>();
   AU.addRequired<DominanceFrontier>();
 }
@@ -847,7 +846,7 @@ void RegionInfo::splitBlock(BasicBlock* NewBB, BasicBlock *OldBB)
 char RegionInfo::ID = 0;
 INITIALIZE_PASS_BEGIN(RegionInfo, "regions",
                 "Detect single entry single exit regions", true, true)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
 INITIALIZE_PASS_DEPENDENCY(DominanceFrontier)
 INITIALIZE_PASS_END(RegionInfo, "regions",
diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp
index 9208fa2..12d7ca3 100644
--- a/lib/Analysis/RegionPass.cpp
+++ b/lib/Analysis/RegionPass.cpp
@@ -189,15 +189,14 @@ public:
   PrintRegionPass(const std::string &B, raw_ostream &o)
       : RegionPass(ID), Banner(B), Out(o) {}
 
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesAll();
   }
 
-  virtual bool runOnRegion(Region *R, RGPassManager &RGM) {
+  bool runOnRegion(Region *R, RGPassManager &RGM) override {
     Out << Banner;
-    for (Region::block_iterator I = R->block_begin(), E = R->block_end();
-         I != E; ++I)
-      (*I)->print(Out);
+    for (const auto &BB : R->blocks())
+      BB->print(Out);
 
     return false;
   }
diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp
index c5f1b92..6467f47 100644
--- a/lib/Analysis/RegionPrinter.cpp
+++ b/lib/Analysis/RegionPrinter.cpp
@@ -121,11 +121,10 @@ struct DOTGraphTraits<RegionInfo*> : public DOTGraphTraits<RegionNode*> {
 
     RegionInfo *RI = R->getRegionInfo();
 
-    for (Region::const_block_iterator BI = R->block_begin(),
-         BE = R->block_end(); BI != BE; ++BI)
-      if (RI->getRegionFor(*BI) == R)
+    for (const auto &BB : R->blocks())
+      if (RI->getRegionFor(BB) == R)
         O.indent(2 * (depth + 1)) << "Node"
-          << static_cast<const void*>(RI->getTopLevelRegion()->getBBNode(*BI))
+          << static_cast<const void*>(RI->getTopLevelRegion()->getBBNode(BB))
           << ";\n";
 
     O.indent(2 * depth) << "}\n";
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 0a02f4e..08de621 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -64,26 +64,25 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Assembly/Writer.h"
+#include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/GlobalAlias.h"
 #include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Support/InstIterator.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetLibraryInfo.h"
@@ -114,7 +113,7 @@ VerifySCEV("verify-scev",
 INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution",
                 "Scalar Evolution Analysis", false, true)
 INITIALIZE_PASS_DEPENDENCY(LoopInfo)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
 INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution",
                 "Scalar Evolution Analysis", false, true)
@@ -136,9 +135,9 @@ void SCEV::dump() const {
 #endif
 
 void SCEV::print(raw_ostream &OS) const {
-  switch (getSCEVType()) {
+  switch (static_cast<SCEVTypes>(getSCEVType())) {
   case scConstant:
-    WriteAsOperand(OS, cast<SCEVConstant>(this)->getValue(), false);
+    cast<SCEVConstant>(this)->getValue()->printAsOperand(OS, false);
     return;
   case scTruncate: {
     const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(this);
@@ -174,7 +173,7 @@ void SCEV::print(raw_ostream &OS) const {
     if (AR->getNoWrapFlags(FlagNW) &&
         !AR->getNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW)))
       OS << "nw><";
-    WriteAsOperand(OS, AR->getLoop()->getHeader(), /*PrintType=*/false);
+    AR->getLoop()->getHeader()->printAsOperand(OS, /*PrintType=*/false);
     OS << ">";
     return;
   }
@@ -194,7 +193,7 @@ void SCEV::print(raw_ostream &OS) const {
     for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
          I != E; ++I) {
       OS << **I;
-      if (llvm::next(I) != E)
+      if (std::next(I) != E)
         OS << OpStr;
     }
     OS << ")";
@@ -229,25 +228,24 @@ void SCEV::print(raw_ostream &OS) const {
     Constant *FieldNo;
     if (U->isOffsetOf(CTy, FieldNo)) {
       OS << "offsetof(" << *CTy << ", ";
-      WriteAsOperand(OS, FieldNo, false);
+      FieldNo->printAsOperand(OS, false);
       OS << ")";
       return;
     }
 
     // Otherwise just print it normally.
-    WriteAsOperand(OS, U->getValue(), false);
+    U->getValue()->printAsOperand(OS, false);
     return;
   }
   case scCouldNotCompute:
     OS << "***COULDNOTCOMPUTE***";
     return;
-  default: break;
   }
   llvm_unreachable("Unknown SCEV kind!");
 }
 
 Type *SCEV::getType() const {
-  switch (getSCEVType()) {
+  switch (static_cast<SCEVTypes>(getSCEVType())) {
   case scConstant:
     return cast<SCEVConstant>(this)->getType();
   case scTruncate:
@@ -267,9 +265,8 @@ Type *SCEV::getType() const {
     return cast<SCEVUnknown>(this)->getType();
   case scCouldNotCompute:
     llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
-  default:
-    llvm_unreachable("Unknown SCEV kind!");
   }
+  llvm_unreachable("Unknown SCEV kind!");
 }
 
 bool SCEV::isZero() const {
@@ -322,7 +319,7 @@ const SCEV *ScalarEvolution::getConstant(ConstantInt *V) {
   return S;
 }
 
-const SCEV *ScalarEvolution::getConstant(const APInt& Val) {
+const SCEV *ScalarEvolution::getConstant(const APInt &Val) {
   return getConstant(ConstantInt::get(getContext(), Val));
 }
 
@@ -482,7 +479,7 @@ namespace {
       // Aside from the getSCEVType() ordering, the particular ordering
       // isn't very important except that it's beneficial to be consistent,
       // so that (a + b) and (b + a) don't end up as different expressions.
-      switch (LType) {
+      switch (static_cast<SCEVTypes>(LType)) {
       case scUnknown: {
         const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
         const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
@@ -619,9 +616,10 @@ namespace {
         return compare(LC->getOperand(), RC->getOperand());
       }
 
-      default:
-        llvm_unreachable("Unknown SCEV kind!");
+      case scCouldNotCompute:
+        llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
       }
+      llvm_unreachable("Unknown SCEV kind!");
     }
   };
 }
@@ -1362,7 +1360,7 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
 /// what it does, given a sequence of operands that would form an add
 /// expression like this:
 ///
-///    m + n + 13 + (A * (o + p + (B * q + m + 29))) + r + (-1 * r)
+///    m + n + 13 + (A * (o + p + (B * (q + m + 29)))) + r + (-1 * r)
 ///
 /// where A and B are constants, update the map with these values:
 ///
@@ -2240,6 +2238,77 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
   return S;
 }
 
+static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) {
+  APInt A = C1->getValue()->getValue().abs();
+  APInt B = C2->getValue()->getValue().abs();
+  uint32_t ABW = A.getBitWidth();
+  uint32_t BBW = B.getBitWidth();
+
+  if (ABW > BBW)
+    B = B.zext(ABW);
+  else if (ABW < BBW)
+    A = A.zext(BBW);
+
+  return APIntOps::GreatestCommonDivisor(A, B);
+}
+
+/// getUDivExactExpr - Get a canonical unsigned division expression, or
+/// something simpler if possible. There is no representation for an exact udiv
+/// in SCEV IR, but we can attempt to remove factors from the LHS and RHS.
+/// We can't do this when it's not exact because the udiv may be clearing bits.
+const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS,
+                                              const SCEV *RHS) {
+  // TODO: we could try to find factors in all sorts of things, but for now we
+  // just deal with u/exact (multiply, constant). See SCEVDivision towards the
+  // end of this file for inspiration.
+
+  const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS);
+  if (!Mul)
+    return getUDivExpr(LHS, RHS);
+
+  if (const SCEVConstant *RHSCst = dyn_cast<SCEVConstant>(RHS)) {
+    // If the mulexpr multiplies by a constant, then that constant must be the
+    // first element of the mulexpr.
+    if (const SCEVConstant *LHSCst =
+            dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
+      if (LHSCst == RHSCst) {
+        SmallVector<const SCEV *, 2> Operands;
+        Operands.append(Mul->op_begin() + 1, Mul->op_end());
+        return getMulExpr(Operands);
+      }
+
+      // We can't just assume that LHSCst divides RHSCst cleanly, it could be
+      // that there's a factor provided by one of the other terms. We need to
+      // check.
+      APInt Factor = gcd(LHSCst, RHSCst);
+      if (!Factor.isIntN(1)) {
+        LHSCst = cast<SCEVConstant>(
+            getConstant(LHSCst->getValue()->getValue().udiv(Factor)));
+        RHSCst = cast<SCEVConstant>(
+            getConstant(RHSCst->getValue()->getValue().udiv(Factor)));
+        SmallVector<const SCEV *, 2> Operands;
+        Operands.push_back(LHSCst);
+        Operands.append(Mul->op_begin() + 1, Mul->op_end());
+        LHS = getMulExpr(Operands);
+        RHS = RHSCst;
+        Mul = dyn_cast<SCEVMulExpr>(LHS);
+        if (!Mul)
+          return getUDivExactExpr(LHS, RHS);
+      }
+    }
+  }
+
+  for (int i = 0, e = Mul->getNumOperands(); i != e; ++i) {
+    if (Mul->getOperand(i) == RHS) {
+      SmallVector<const SCEV *, 2> Operands;
+      Operands.append(Mul->op_begin(), Mul->op_begin() + i);
+      Operands.append(Mul->op_begin() + i + 1, Mul->op_end());
+      return getMulExpr(Operands);
+    }
+  }
+
+  return getUDivExpr(LHS, RHS);
+}
 
 /// getAddRecExpr - Get an add recurrence expression for the specified loop.
 /// Simplify the expression as much as possible.
@@ -2594,12 +2663,12 @@ const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
   // If we have DataLayout, we can bypass creating a target-independent
   // constant expression and then folding it back into a ConstantInt.
   // This is just a compile-time optimization.
-  if (TD)
-    return getConstant(IntTy, TD->getTypeAllocSize(AllocTy));
+  if (DL)
+    return getConstant(IntTy, DL->getTypeAllocSize(AllocTy));
 
   Constant *C = ConstantExpr::getSizeOf(AllocTy);
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
+    if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI))
       C = Folded;
   Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
   assert(Ty == IntTy && "Effective SCEV type doesn't match");
@@ -2612,14 +2681,14 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy,
   // If we have DataLayout, we can bypass creating a target-independent
   // constant expression and then folding it back into a ConstantInt.
   // This is just a compile-time optimization.
-  if (TD) {
+  if (DL) {
     return getConstant(IntTy,
-                       TD->getStructLayout(STy)->getElementOffset(FieldNo));
+                       DL->getStructLayout(STy)->getElementOffset(FieldNo));
   }
 
   Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo);
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
+    if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI))
       C = Folded;
 
   Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
@@ -2667,8 +2736,8 @@ uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
   assert(isSCEVable(Ty) && "Type is not SCEVable!");
 
   // If we have a DataLayout, use it!
-  if (TD)
-    return TD->getTypeSizeInBits(Ty);
+  if (DL)
+    return DL->getTypeSizeInBits(Ty);
 
   // Integer types have fixed sizes.
   if (Ty->isIntegerTy())
@@ -2694,8 +2763,8 @@ Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
   // The only other support type is pointer.
   assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
 
-  if (TD)
-    return TD->getIntPtrType(Ty);
+  if (DL)
+    return DL->getIntPtrType(Ty);
 
   // Without DataLayout, conservatively assume pointers are 64-bit.
   return Type::getInt64Ty(getContext());
@@ -2714,7 +2783,7 @@ namespace {
     bool FindOne;
     FindInvalidSCEVUnknown() { FindOne = false; }
     bool follow(const SCEV *S) {
-      switch (S->getSCEVType()) {
+      switch (static_cast<SCEVTypes>(S->getSCEVType())) {
       case scConstant:
         return false;
       case scUnknown:
@@ -2964,9 +3033,8 @@ static void
 PushDefUseChildren(Instruction *I,
                    SmallVectorImpl<Instruction *> &Worklist) {
   // Push the def-use children onto the Worklist stack.
-  for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
-       UI != UE; ++UI)
-    Worklist.push_back(cast<Instruction>(*UI));
+  for (User *U : I->users())
+    Worklist.push_back(cast<Instruction>(U));
 }
 
 /// ForgetSymbolicValue - This looks up computed SCEV values for all
@@ -3163,7 +3231,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
   // PHI's incoming blocks are in a different loop, in which case doing so
   // risks breaking LCSSA form. Instcombine would normally zap these, but
   // it doesn't have DominatorTree information, so it may miss cases.
-  if (Value *V = SimplifyInstruction(PN, TD, TLI, DT))
+  if (Value *V = SimplifyInstruction(PN, DL, TLI, DT))
     if (LI->replacementPreservesLCSSAForm(PN, V))
       return getSCEV(V);
 
@@ -3189,7 +3257,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
 
   const SCEV *TotalOffset = getConstant(IntPtrTy, 0);
   gep_type_iterator GTI = gep_type_begin(GEP);
-  for (GetElementPtrInst::op_iterator I = llvm::next(GEP->op_begin()),
+  for (GetElementPtrInst::op_iterator I = std::next(GEP->op_begin()),
                                       E = GEP->op_end();
        I != E; ++I) {
     Value *Index = *I;
@@ -3434,7 +3502,7 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
     // For a SCEVUnknown, ask ValueTracking.
     APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
-    ComputeMaskedBits(U->getValue(), Zeros, Ones, TD);
+    ComputeMaskedBits(U->getValue(), Zeros, Ones, DL);
     if (Ones == ~Zeros + 1)
       return setUnsignedRange(U, ConservativeResult);
     return setUnsignedRange(U,
@@ -3584,9 +3652,9 @@ ScalarEvolution::getSignedRange(const SCEV *S) {
 
   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
     // For a SCEVUnknown, ask ValueTracking.
-    if (!U->getValue()->getType()->isIntegerTy() && !TD)
+    if (!U->getValue()->getType()->isIntegerTy() && !DL)
       return setSignedRange(U, ConservativeResult);
-    unsigned NS = ComputeNumSignBits(U->getValue(), TD);
+    unsigned NS = ComputeNumSignBits(U->getValue(), DL);
     if (NS <= 1)
       return setSignedRange(U, ConservativeResult);
     return setSignedRange(U, ConservativeResult.intersectWith(
@@ -3690,17 +3758,24 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
       // Use ComputeMaskedBits to compute what ShrinkDemandedConstant
       // knew about to reconstruct a low-bits mask value.
       unsigned LZ = A.countLeadingZeros();
+      unsigned TZ = A.countTrailingZeros();
       unsigned BitWidth = A.getBitWidth();
       APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
-      ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, TD);
-
-      APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ);
-
-      if (LZ != 0 && !((~A & ~KnownZero) & EffectiveMask))
-        return
-          getZeroExtendExpr(getTruncateExpr(getSCEV(U->getOperand(0)),
-                                IntegerType::get(getContext(), BitWidth - LZ)),
-                            U->getType());
+      ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, DL);
+
+      APInt EffectiveMask =
+          APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ);
+      if ((LZ != 0 || TZ != 0) && !((~A & ~KnownZero) & EffectiveMask)) {
+        const SCEV *MulCount = getConstant(
+            ConstantInt::get(getContext(), APInt::getOneBitSet(BitWidth, TZ)));
+        return getMulExpr(
+            getZeroExtendExpr(
+                getTruncateExpr(
+                    getUDivExactExpr(getSCEV(U->getOperand(0)), MulCount),
+                    IntegerType::get(getContext(), BitWidth - LZ - TZ)),
+                U->getType()),
+            MulCount);
+      }
     }
     break;
 
@@ -4338,6 +4413,8 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
   // Examine all exits and pick the most conservative values.
   const SCEV *MaxBECount = getCouldNotCompute();
   bool CouldComputeBECount = true;
+  BasicBlock *Latch = L->getLoopLatch(); // may be NULL.
+  const SCEV *LatchMaxCount = 0;
   SmallVector<std::pair<BasicBlock *, const SCEV *>, 4> ExitCounts;
   for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
     ExitLimit EL = ComputeExitLimit(L, ExitingBlocks[i]);
@@ -4354,13 +4431,17 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
       // We cannot take the "min" MaxBECount, because non-unit stride loops may
       // skip some loop tests. Taking the max over the exits is sufficiently
       // conservative.  TODO: We could do better taking into consideration
-      // that (1) the loop has unit stride (2) the last loop test is
-      // less-than/greater-than (3) any loop test is less-than/greater-than AND
-      // falls-through some constant times less then the other tests.
-      MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, EL.Max);
+      // non-latch exits that dominate the latch.
+      if (EL.MustExit && ExitingBlocks[i] == Latch)
+        LatchMaxCount = EL.Max;
+      else
+        MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, EL.Max);
     }
   }
-
+  // Be more precise in the easy case of a loop latch that must exit.
+  if (LatchMaxCount) {
+    MaxBECount = getUMinFromMismatchedTypes(MaxBECount, LatchMaxCount);
+  }
   return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount);
 }
 
@@ -4370,12 +4451,19 @@ ScalarEvolution::ExitLimit
 ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
 
   // Okay, we've chosen an exiting block.  See what condition causes us to
-  // exit at this block.
-  //
-  // FIXME: we should be able to handle switch instructions (with a single exit)
-  BranchInst *ExitBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
-  if (ExitBr == 0) return getCouldNotCompute();
-  assert(ExitBr->isConditional() && "If unconditional, it can't be in loop!");
+  // exit at this block and remember the exit block and whether all other targets
+  // lead to the loop header.
+  bool MustExecuteLoopHeader = true;
+  BasicBlock *Exit = 0;
+  for (succ_iterator SI = succ_begin(ExitingBlock), SE = succ_end(ExitingBlock);
+       SI != SE; ++SI)
+    if (!L->contains(*SI)) {
+      if (Exit) // Multiple exit successors.
+        return getCouldNotCompute();
+      Exit = *SI;
+    } else if (*SI != L->getHeader()) {
+      MustExecuteLoopHeader = false;
+    }
 
   // At this point, we know we have a conditional branch that determines whether
   // the loop is exited.  However, we don't know if the branch is executed each
@@ -4394,13 +4482,11 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
   //
   //  More extensive analysis could be done to handle more cases here.
   //
-  if (ExitBr->getSuccessor(0) != L->getHeader() &&
-      ExitBr->getSuccessor(1) != L->getHeader() &&
-      ExitBr->getParent() != L->getHeader()) {
+  if (!MustExecuteLoopHeader && ExitingBlock != L->getHeader()) {
     // The simple checks failed, try climbing the unique predecessor chain
     // up to the header.
     bool Ok = false;
-    for (BasicBlock *BB = ExitBr->getParent(); BB; ) {
+    for (BasicBlock *BB = ExitingBlock; BB; ) {
       BasicBlock *Pred = BB->getUniquePredecessor();
       if (!Pred)
         return getCouldNotCompute();
@@ -4424,11 +4510,20 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
       return getCouldNotCompute();
   }
 
-  // Proceed to the next level to examine the exit condition expression.
-  return ComputeExitLimitFromCond(L, ExitBr->getCondition(),
-                                  ExitBr->getSuccessor(0),
-                                  ExitBr->getSuccessor(1),
-                                  /*IsSubExpr=*/false);
+  TerminatorInst *Term = ExitingBlock->getTerminator();
+  if (BranchInst *BI = dyn_cast<BranchInst>(Term)) {
+    assert(BI->isConditional() && "If unconditional, it can't be in loop!");
+    // Proceed to the next level to examine the exit condition expression.
+    return ComputeExitLimitFromCond(L, BI->getCondition(), BI->getSuccessor(0),
+                                    BI->getSuccessor(1),
+                                    /*IsSubExpr=*/false);
+  }
+
+  if (SwitchInst *SI = dyn_cast<SwitchInst>(Term))
+    return ComputeExitLimitFromSingleExitSwitch(L, SI, Exit,
+                                                /*IsSubExpr=*/false);
+
+  return getCouldNotCompute();
 }
 
 /// ComputeExitLimitFromCond - Compute the number of times the
@@ -4456,6 +4551,7 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
                                                IsSubExpr || EitherMayExit);
       const SCEV *BECount = getCouldNotCompute();
       const SCEV *MaxBECount = getCouldNotCompute();
+      bool MustExit = false;
       if (EitherMayExit) {
         // Both conditions must be true for the loop to continue executing.
         // Choose the less conservative count.
@@ -4470,6 +4566,7 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
           MaxBECount = EL0.Max;
         else
           MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
+        MustExit = EL0.MustExit || EL1.MustExit;
       } else {
         // Both conditions must be true at the same time for the loop to exit.
         // For now, be conservative.
@@ -4478,9 +4575,10 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
           MaxBECount = EL0.Max;
         if (EL0.Exact == EL1.Exact)
           BECount = EL0.Exact;
+        MustExit = EL0.MustExit && EL1.MustExit;
       }
 
-      return ExitLimit(BECount, MaxBECount);
+      return ExitLimit(BECount, MaxBECount, MustExit);
     }
     if (BO->getOpcode() == Instruction::Or) {
       // Recurse on the operands of the or.
@@ -4491,6 +4589,7 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
                                                IsSubExpr || EitherMayExit);
       const SCEV *BECount = getCouldNotCompute();
       const SCEV *MaxBECount = getCouldNotCompute();
+      bool MustExit = false;
       if (EitherMayExit) {
         // Both conditions must be false for the loop to continue executing.
         // Choose the less conservative count.
@@ -4505,6 +4604,7 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
           MaxBECount = EL0.Max;
         else
           MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
+        MustExit = EL0.MustExit || EL1.MustExit;
       } else {
         // Both conditions must be false at the same time for the loop to exit.
         // For now, be conservative.
@@ -4513,9 +4613,10 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
           MaxBECount = EL0.Max;
         if (EL0.Exact == EL1.Exact)
           BECount = EL0.Exact;
+        MustExit = EL0.MustExit && EL1.MustExit;
       }
 
-      return ExitLimit(BECount, MaxBECount);
+      return ExitLimit(BECount, MaxBECount, MustExit);
     }
   }
 
@@ -4639,6 +4740,30 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
   return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
 }
 
+ScalarEvolution::ExitLimit
+ScalarEvolution::ComputeExitLimitFromSingleExitSwitch(const Loop *L,
+                                                      SwitchInst *Switch,
+                                                      BasicBlock *ExitingBlock,
+                                                      bool IsSubExpr) {
+  assert(!L->contains(ExitingBlock) && "Not an exiting block!");
+
+  // Give up if the exit is the default dest of a switch.
+  if (Switch->getDefaultDest() == ExitingBlock)
+    return getCouldNotCompute();
+
+  assert(L->contains(Switch->getDefaultDest()) &&
+         "Default case must not exit the loop!");
+  const SCEV *LHS = getSCEVAtScope(Switch->getCondition(), L);
+  const SCEV *RHS = getConstant(Switch->findCaseDest(ExitingBlock));
+
+  // while (X != Y) --> while (X-Y != 0)
+  ExitLimit EL = HowFarToZero(getMinusSCEV(LHS, RHS), L, IsSubExpr);
+  if (EL.hasAnyInfo())
+    return EL;
+
+  return getCouldNotCompute();
+}
+
 static ConstantInt *
 EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C,
                                 ScalarEvolution &SE) {
@@ -4830,7 +4955,7 @@ static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
 /// reason, return null.
 static Constant *EvaluateExpression(Value *V, const Loop *L,
                                     DenseMap<Instruction *, Constant *> &Vals,
-                                    const DataLayout *TD,
+                                    const DataLayout *DL,
                                     const TargetLibraryInfo *TLI) {
   // Convenient constant check, but redundant for recursive calls.
   if (Constant *C = dyn_cast<Constant>(V)) return C;
@@ -4857,7 +4982,7 @@ static Constant *EvaluateExpression(Value *V, const Loop *L,
       if (!Operands[i]) return 0;
       continue;
     }
-    Constant *C = EvaluateExpression(Operand, L, Vals, TD, TLI);
+    Constant *C = EvaluateExpression(Operand, L, Vals, DL, TLI);
     Vals[Operand] = C;
     if (!C) return 0;
     Operands[i] = C;
@@ -4865,12 +4990,12 @@ static Constant *EvaluateExpression(Value *V, const Loop *L,
 
   if (CmpInst *CI = dyn_cast<CmpInst>(I))
     return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
-                                           Operands[1], TD, TLI);
+                                           Operands[1], DL, TLI);
   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
     if (!LI->isVolatile())
-      return ConstantFoldLoadFromConstPtr(Operands[0], TD);
+      return ConstantFoldLoadFromConstPtr(Operands[0], DL);
   }
-  return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, TD,
+  return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, DL,
                                   TLI);
 }
 
@@ -4926,7 +5051,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
     // Compute the value of the PHIs for the next iteration.
     // EvaluateExpression adds non-phi values to the CurrentIterVals map.
     DenseMap<Instruction *, Constant *> NextIterVals;
-    Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD,
+    Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL,
                                            TLI);
     if (NextPHI == 0)
       return 0;        // Couldn't evaluate!
@@ -4952,7 +5077,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
       Constant *&NextPHI = NextIterVals[PHI];
       if (!NextPHI) {   // Not already computed.
         Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
-        NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI);
+        NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI);
       }
       if (NextPHI != I->second)
         StoppedEvolving = false;
@@ -5008,7 +5133,7 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
   for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){
     ConstantInt *CondVal =
       dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, L, CurrentIterVals,
-                                                       TD, TLI));
+                                                       DL, TLI));
 
     // Couldn't symbolically evaluate.
     if (!CondVal) return getCouldNotCompute();
@@ -5038,7 +5163,7 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
       if (NextPHI) continue;    // Already computed!
 
       Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
-      NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI);
+      NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI);
     }
     CurrentIterVals.swap(NextIterVals);
   }
@@ -5082,8 +5207,7 @@ const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
 /// SCEVConstant, because SCEVConstant is restricted to ConstantInt.
 /// Returns NULL if the SCEV isn't representable as a Constant.
 static Constant *BuildConstantFromSCEV(const SCEV *V) {
-  switch (V->getSCEVType()) {
-    default:  // TODO: smax, umax.
+  switch (static_cast<SCEVTypes>(V->getSCEVType())) {
     case scCouldNotCompute:
     case scAddRecExpr:
       break;
@@ -5170,6 +5294,9 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
             return ConstantExpr::getUDiv(LHS, RHS);
       break;
     }
+    case scSMaxExpr:
+    case scUMaxExpr:
+      break; // TODO: smax, umax.
   }
   return 0;
 }
@@ -5241,14 +5368,14 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
           Constant *C = 0;
           if (const CmpInst *CI = dyn_cast<CmpInst>(I))
             C = ConstantFoldCompareInstOperands(CI->getPredicate(),
-                                                Operands[0], Operands[1], TD,
+                                                Operands[0], Operands[1], DL,
                                                 TLI);
           else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
             if (!LI->isVolatile())
-              C = ConstantFoldLoadFromConstPtr(Operands[0], TD);
+              C = ConstantFoldLoadFromConstPtr(Operands[0], DL);
           } else
             C = ConstantFoldInstOperands(I->getOpcode(), I->getType(),
-                                         Operands, TD, TLI);
+                                         Operands, DL, TLI);
           if (!C) return V;
           return getSCEV(C);
         }
@@ -5595,7 +5722,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr) {
     else
       MaxBECount = getConstant(CountDown ? CR.getUnsignedMax()
                                          : -CR.getUnsignedMin());
-    return ExitLimit(Distance, MaxBECount);
+    return ExitLimit(Distance, MaxBECount, /*MustExit=*/true);
   }
 
   // If the recurrence is known not to wraparound, unsigned divide computes the
@@ -5603,15 +5730,29 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr) {
   // that the value will either become zero (and thus the loop terminates), that
   // the loop will terminate through some other exit condition first, or that
   // the loop has undefined behavior.  This means we can't "miss" the exit
-  // value, even with nonunit stride.
+  // value, even with nonunit stride, and exit later via the same branch. Note
+  // that we can skip this exit if loop later exits via a different
+  // branch. Hence MustExit=false.
   //
   // This is only valid for expressions that directly compute the loop exit. It
   // is invalid for subexpressions in which the loop may exit through this
   // branch even if this subexpression is false. In that case, the trip count
   // computed by this udiv could be smaller than the number of well-defined
   // iterations.
-  if (!IsSubExpr && AddRec->getNoWrapFlags(SCEV::FlagNW))
-    return getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
+  if (!IsSubExpr && AddRec->getNoWrapFlags(SCEV::FlagNW)) {
+    const SCEV *Exact =
+      getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
+    return ExitLimit(Exact, Exact, /*MustExit=*/false);
+  }
+
+  // If Step is a power of two that evenly divides Start we know that the loop
+  // will always terminate.  Start may not be a constant so we just have the
+  // number of trailing zeros available.  This is safe even in presence of
+  // overflow as the recurrence will overflow to exactly 0.
+  const APInt &StepV = StepC->getValue()->getValue();
+  if (StepV.isPowerOf2() &&
+      GetMinTrailingZeros(getNegativeSCEV(Start)) >= StepV.countTrailingZeros())
+    return getUDivExactExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
 
   // Then, try to solve the above equation provided that Start is constant.
   if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
@@ -6024,7 +6165,6 @@ ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
   default:
     llvm_unreachable("Unexpected ICmpInst::Predicate value!");
   case ICmpInst::ICMP_SGT:
-    Pred = ICmpInst::ICMP_SLT;
     std::swap(LHS, RHS);
   case ICmpInst::ICMP_SLT: {
     ConstantRange LHSRange = getSignedRange(LHS);
@@ -6036,7 +6176,6 @@ ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
     break;
   }
   case ICmpInst::ICMP_SGE:
-    Pred = ICmpInst::ICMP_SLE;
     std::swap(LHS, RHS);
   case ICmpInst::ICMP_SLE: {
     ConstantRange LHSRange = getSignedRange(LHS);
@@ -6048,7 +6187,6 @@ ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
     break;
   }
   case ICmpInst::ICMP_UGT:
-    Pred = ICmpInst::ICMP_ULT;
     std::swap(LHS, RHS);
   case ICmpInst::ICMP_ULT: {
     ConstantRange LHSRange = getUnsignedRange(LHS);
@@ -6060,7 +6198,6 @@ ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
     break;
   }
   case ICmpInst::ICMP_UGE:
-    Pred = ICmpInst::ICMP_ULE;
     std::swap(LHS, RHS);
   case ICmpInst::ICMP_ULE: {
     ConstantRange LHSRange = getUnsignedRange(LHS);
@@ -6218,7 +6355,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
   // LHS' type is checked for above.
   if (getTypeSizeInBits(LHS->getType()) >
       getTypeSizeInBits(FoundLHS->getType())) {
-    if (CmpInst::isSigned(Pred)) {
+    if (CmpInst::isSigned(FoundPred)) {
       FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType());
       FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType());
     } else {
@@ -6466,7 +6603,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
     IsSigned ? APIntOps::smin(getSignedRange(RHS).getSignedMax(), Limit)
              : APIntOps::umin(getUnsignedRange(RHS).getUnsignedMax(), Limit);
 
-  const SCEV *MaxBECount = getCouldNotCompute();
+  const SCEV *MaxBECount;
   if (isa<SCEVConstant>(BECount))
     MaxBECount = BECount;
   else
@@ -6476,7 +6613,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
   if (isa<SCEVCouldNotCompute>(MaxBECount))
     MaxBECount = BECount;
 
-  return ExitLimit(BECount, MaxBECount);
+  return ExitLimit(BECount, MaxBECount, /*MustExit=*/true);
 }
 
 ScalarEvolution::ExitLimit
@@ -6548,7 +6685,7 @@ ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
   if (isa<SCEVCouldNotCompute>(MaxBECount))
     MaxBECount = BECount;
 
-  return ExitLimit(BECount, MaxBECount);
+  return ExitLimit(BECount, MaxBECount, /*MustExit=*/true);
 }
 
 /// getNumIterationsInRange - Return the number of iterations of this loop that
@@ -6677,20 +6814,6 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
   return SE.getCouldNotCompute();
 }
 
-static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) {
-  APInt A = C1->getValue()->getValue().abs();
-  APInt B = C2->getValue()->getValue().abs();
-  uint32_t ABW = A.getBitWidth();
-  uint32_t BBW = B.getBitWidth();
-
-  if (ABW > BBW)
-    B = B.zext(ABW);
-  else if (ABW < BBW)
-    A = A.zext(BBW);
-
-  return APIntOps::GreatestCommonDivisor(A, B);
-}
-
 static const APInt srem(const SCEVConstant *C1, const SCEVConstant *C2) {
   APInt A = C1->getValue()->getValue();
   APInt B = C2->getValue()->getValue();
@@ -6997,7 +7120,6 @@ public:
           Operands.push_back(Expr->getOperand(i));
       }
     } else {
-      FoundGCDTerm = false;
       const SCEV *PartialGCD = One;
       for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
         if (PartialGCD == GCD) {
@@ -7128,7 +7250,7 @@ SCEVAddRecExpr::delinearize(ScalarEvolution &SE,
   const SCEV *Start = this->getStart();
   const SCEV *Step = this->getStepRecurrence(SE);
 
-  // Build the SCEV representation of the cannonical induction variable in the
+  // Build the SCEV representation of the canonical induction variable in the
   // loop of this SCEV.
   const SCEV *Zero = SE.getConstant(this->getType(), 0);
   const SCEV *One = SE.getConstant(this->getType(), 1);
@@ -7137,50 +7259,41 @@ SCEVAddRecExpr::delinearize(ScalarEvolution &SE,
 
   DEBUG(dbgs() << "(delinearize: " << *this << "\n");
 
-  // Currently we fail to delinearize when the stride of this SCEV is 1. We
-  // could decide to not fail in this case: we could just return 1 for the size
-  // of the subscript, and this same SCEV for the access function.
-  if (Step == One) {
-    DEBUG(dbgs() << "failed to delinearize " << *this << "\n)\n");
-    return this;
-  }
+  // When the stride of this SCEV is 1, do not compute the GCD: the size of this
+  // subscript is 1, and this same SCEV for the access function.
+  const SCEV *Remainder = Zero;
+  const SCEV *GCD = One;
 
   // Find the GCD and Remainder of the Start and Step coefficients of this SCEV.
-  const SCEV *Remainder = NULL;
-  const SCEV *GCD = SCEVGCD::findGCD(SE, Start, Step, &Remainder);
+  if (Step != One && !Step->isAllOnesValue())
+    GCD = SCEVGCD::findGCD(SE, Start, Step, &Remainder);
 
   DEBUG(dbgs() << "GCD: " << *GCD << "\n");
   DEBUG(dbgs() << "Remainder: " << *Remainder << "\n");
 
-  // Same remark as above: we currently fail the delinearization, although we
-  // can very well handle this special case.
-  if (GCD == One) {
-    DEBUG(dbgs() << "failed to delinearize " << *this << "\n)\n");
-    return this;
-  }
+  const SCEV *Quotient = Start;
+  if (GCD != One && !GCD->isAllOnesValue())
+    // As findGCD computed Remainder, GCD divides "Start - Remainder." The
+    // Quotient is then this SCEV without Remainder, scaled down by the GCD.  The
+    // Quotient is what will be used in the next subscript delinearization.
+    Quotient = SCEVDivision::divide(SE, SE.getMinusSCEV(Start, Remainder), GCD);
 
-  // As findGCD computed Remainder, GCD divides "Start - Remainder." The
-  // Quotient is then this SCEV without Remainder, scaled down by the GCD.  The
-  // Quotient is what will be used in the next subscript delinearization.
-  const SCEV *Quotient =
-      SCEVDivision::divide(SE, SE.getMinusSCEV(Start, Remainder), GCD);
   DEBUG(dbgs() << "Quotient: " << *Quotient << "\n");
 
-  const SCEV *Rem;
+  const SCEV *Rem = Quotient;
   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Quotient))
     // Recursively call delinearize on the Quotient until there are no more
     // multiples that can be recognized.
     Rem = AR->delinearize(SE, Subscripts, Sizes);
-  else
-    Rem = Quotient;
 
-  // Scale up the cannonical induction variable IV by whatever remains from the
+  // Scale up the canonical induction variable IV by whatever remains from the
   // Step after division by the GCD: the GCD is the size of all the sub-array.
-  if (Step != GCD) {
+  if (Step != One && !Step->isAllOnesValue() && GCD != One &&
+      !GCD->isAllOnesValue() && Step != GCD) {
     Step = SCEVDivision::divide(SE, Step, GCD);
     IV = SE.getMulExpr(IV, Step);
   }
-  // The access function in the current subscript is computed as the cannonical
+  // The access function in the current subscript is computed as the canonical
   // induction variable IV (potentially scaled up by the step) and offset by
   // Rem, the offset of delinearization in the sub-array.
   const SCEV *Index = SE.getAddExpr(IV, Rem);
@@ -7225,11 +7338,8 @@ void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) {
   // so that future queries will recompute the expressions using the new
   // value.
   Value *Old = getValPtr();
-  SmallVector<User *, 16> Worklist;
+  SmallVector<User *, 16> Worklist(Old->user_begin(), Old->user_end());
   SmallPtrSet<User *, 8> Visited;
-  for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end();
-       UI != UE; ++UI)
-    Worklist.push_back(*UI);
   while (!Worklist.empty()) {
     User *U = Worklist.pop_back_val();
     // Deleting the Old value will cause this to dangle. Postpone
@@ -7241,9 +7351,7 @@ void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) {
     if (PHINode *PN = dyn_cast<PHINode>(U))
       SE->ConstantEvolutionLoopExitValue.erase(PN);
     SE->ValueExprMap.erase(U);
-    for (Value::use_iterator UI = U->use_begin(), UE = U->use_end();
-         UI != UE; ++UI)
-      Worklist.push_back(*UI);
+    Worklist.insert(Worklist.end(), U->user_begin(), U->user_end());
   }
   // Delete the Old value.
   if (PHINode *PN = dyn_cast<PHINode>(Old))
@@ -7267,9 +7375,10 @@ ScalarEvolution::ScalarEvolution()
 bool ScalarEvolution::runOnFunction(Function &F) {
   this->F = &F;
   LI = &getAnalysis<LoopInfo>();
-  TD = getAnalysisIfAvailable<DataLayout>();
+  DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+  DL = DLP ? &DLP->getDataLayout() : 0;
   TLI = &getAnalysis<TargetLibraryInfo>();
-  DT = &getAnalysis<DominatorTree>();
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   return false;
 }
 
@@ -7306,7 +7415,7 @@ void ScalarEvolution::releaseMemory() {
 void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   AU.addRequiredTransitive<LoopInfo>();
-  AU.addRequiredTransitive<DominatorTree>();
+  AU.addRequiredTransitive<DominatorTreeWrapperPass>();
   AU.addRequired<TargetLibraryInfo>();
 }
 
@@ -7321,7 +7430,7 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
     PrintLoopInfo(OS, SE, *I);
 
   OS << "Loop ";
-  WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false);
+  L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
   OS << ": ";
 
   SmallVector<BasicBlock *, 8> ExitBlocks;
@@ -7337,7 +7446,7 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
 
   OS << "\n"
         "Loop ";
-  WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false);
+  L->getHeader()->printAsOperand(OS, /*PrintType=*/false);
   OS << ": ";
 
   if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) {
@@ -7359,7 +7468,7 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
   ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
 
   OS << "Classifying expressions for: ";
-  WriteAsOperand(OS, F, /*PrintType=*/false);
+  F->printAsOperand(OS, /*PrintType=*/false);
   OS << "\n";
   for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
     if (isSCEVable(I->getType()) && !isa<CmpInst>(*I)) {
@@ -7390,7 +7499,7 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
     }
 
   OS << "Determining loop execution counts for: ";
-  WriteAsOperand(OS, F, /*PrintType=*/false);
+  F->printAsOperand(OS, /*PrintType=*/false);
   OS << "\n";
   for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
     PrintLoopInfo(OS, &SE, *I);
@@ -7417,7 +7526,7 @@ ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) {
 
 ScalarEvolution::LoopDisposition
 ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
-  switch (S->getSCEVType()) {
+  switch (static_cast<SCEVTypes>(S->getSCEVType())) {
   case scConstant:
     return LoopInvariant;
   case scTruncate:
@@ -7490,8 +7599,8 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
     return LoopInvariant;
   case scCouldNotCompute:
     llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
-  default: llvm_unreachable("Unknown SCEV kind!");
   }
+  llvm_unreachable("Unknown SCEV kind!");
 }
 
 bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) {
@@ -7523,7 +7632,7 @@ ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) {
 
 ScalarEvolution::BlockDisposition
 ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
-  switch (S->getSCEVType()) {
+  switch (static_cast<SCEVTypes>(S->getSCEVType())) {
   case scConstant:
     return ProperlyDominatesBlock;
   case scTruncate:
@@ -7580,9 +7689,8 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
     return ProperlyDominatesBlock;
   case scCouldNotCompute:
     llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
-  default:
-    llvm_unreachable("Unknown SCEV kind!");
   }
+  llvm_unreachable("Unknown SCEV kind!");
 }
 
 bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) {
@@ -7637,7 +7745,7 @@ void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
 
 typedef DenseMap<const Loop *, std::string> VerifyMap;
 
-/// replaceSubString - Replaces all occurences of From in Str with To.
+/// replaceSubString - Replaces all occurrences of From in Str with To.
 static void replaceSubString(std::string &Str, StringRef From, StringRef To) {
   size_t Pos = 0;
   while ((Pos = Str.find(From, Pos)) != std::string::npos) {
diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index 79c5f0d..7be6aca 100644
--- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -43,16 +43,16 @@ namespace {
     /// an analysis interface through multiple inheritance.  If needed, it
     /// should override this to adjust the this pointer as needed for the
     /// specified pass info.
-    virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+    void *getAdjustedAnalysisPointer(AnalysisID PI) override {
       if (PI == &AliasAnalysis::ID)
         return (AliasAnalysis*)this;
       return this;
     }
 
   private:
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-    virtual bool runOnFunction(Function &F);
-    virtual AliasResult alias(const Location &LocA, const Location &LocB);
+    void getAnalysisUsage(AnalysisUsage &AU) const override;
+    bool runOnFunction(Function &F) override;
+    AliasResult alias(const Location &LocA, const Location &LocB) override;
 
     Value *GetBaseValue(const SCEV *S);
   };
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 86a557b..fb3d595 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -14,11 +14,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
-#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/Debug.h"
@@ -46,9 +47,7 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
   Instruction *Ret = NULL;
 
   // Check to see if there is already a cast!
-  for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
-       UI != E; ++UI) {
-    User *U = *UI;
+  for (User *U : V->users())
     if (U->getType() == Ty)
       if (CastInst *CI = dyn_cast<CastInst>(U))
         if (CI->getOpcode() == Op) {
@@ -68,7 +67,6 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
           Ret = CI;
           break;
         }
-  }
 
   // Create a new cast.
   if (!Ret)
@@ -209,7 +207,7 @@ static bool FactorOutConstant(const SCEV *&S,
                               const SCEV *&Remainder,
                               const SCEV *Factor,
                               ScalarEvolution &SE,
-                              const DataLayout *TD) {
+                              const DataLayout *DL) {
   // Everything is divisible by one.
   if (Factor->isOne())
     return true;
@@ -249,7 +247,7 @@ static bool FactorOutConstant(const SCEV *&S,
   // In a Mul, check if there is a constant operand which is a multiple
   // of the given factor.
   if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
-    if (TD) {
+    if (DL) {
       // With DataLayout, the size is known. Check if there is a constant
       // operand which is a multiple of the given factor. If so, we can
       // factor it.
@@ -269,7 +267,7 @@ static bool FactorOutConstant(const SCEV *&S,
       for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
         const SCEV *SOp = M->getOperand(i);
         const SCEV *Remainder = SE.getConstant(SOp->getType(), 0);
-        if (FactorOutConstant(SOp, Remainder, Factor, SE, TD) &&
+        if (FactorOutConstant(SOp, Remainder, Factor, SE, DL) &&
             Remainder->isZero()) {
           SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
           NewMulOps[i] = SOp;
@@ -284,12 +282,12 @@ static bool FactorOutConstant(const SCEV *&S,
   if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
     const SCEV *Step = A->getStepRecurrence(SE);
     const SCEV *StepRem = SE.getConstant(Step->getType(), 0);
-    if (!FactorOutConstant(Step, StepRem, Factor, SE, TD))
+    if (!FactorOutConstant(Step, StepRem, Factor, SE, DL))
       return false;
     if (!StepRem->isZero())
       return false;
     const SCEV *Start = A->getStart();
-    if (!FactorOutConstant(Start, Remainder, Factor, SE, TD))
+    if (!FactorOutConstant(Start, Remainder, Factor, SE, DL))
       return false;
     S = SE.getAddRecExpr(Start, Step, A->getLoop(),
                          A->getNoWrapFlags(SCEV::FlagNW));
@@ -403,8 +401,8 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
   // without the other.
   SplitAddRecs(Ops, Ty, SE);
 
-  Type *IntPtrTy = SE.TD
-                 ? SE.TD->getIntPtrType(PTy)
+  Type *IntPtrTy = SE.DL
+                 ? SE.DL->getIntPtrType(PTy)
                  : Type::getInt64Ty(PTy->getContext());
 
   // Descend down the pointer's type and attempt to convert the other
@@ -423,7 +421,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
         for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
           const SCEV *Op = Ops[i];
           const SCEV *Remainder = SE.getConstant(Ty, 0);
-          if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.TD)) {
+          if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.DL)) {
             // Op now has ElSize factored out.
             ScaledOps.push_back(Op);
             if (!Remainder->isZero())
@@ -457,13 +455,13 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
       bool FoundFieldNo = false;
       // An empty struct has no fields.
       if (STy->getNumElements() == 0) break;
-      if (SE.TD) {
+      if (SE.DL) {
         // With DataLayout, field offsets are known. See if a constant offset
         // falls within any of the struct fields.
         if (Ops.empty()) break;
         if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0]))
           if (SE.getTypeSizeInBits(C->getType()) <= 64) {
-            const StructLayout &SL = *SE.TD->getStructLayout(STy);
+            const StructLayout &SL = *SE.DL->getStructLayout(STy);
             uint64_t FullOffset = C->getValue()->getZExtValue();
             if (FullOffset < SL.getSizeInBytes()) {
               unsigned ElIdx = SL.getElementContainingOffset(FullOffset);
@@ -1016,6 +1014,54 @@ Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
   return IncV;
 }
 
+/// \brief Hoist the addrec instruction chain rooted in the loop phi above the
+/// position. This routine assumes that this is possible (has been checked).
+static void hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist,
+                           Instruction *Pos, PHINode *LoopPhi) {
+  do {
+    if (DT->dominates(InstToHoist, Pos))
+      break;
+    // Make sure the increment is where we want it. But don't move it
+    // down past a potential existing post-inc user.
+    InstToHoist->moveBefore(Pos);
+    Pos = InstToHoist;
+    InstToHoist = cast<Instruction>(InstToHoist->getOperand(0));
+  } while (InstToHoist != LoopPhi);
+}
+
+/// \brief Check whether we can cheaply express the requested SCEV in terms of
+/// the available PHI SCEV by truncation and/or invertion of the step.
+static bool canBeCheaplyTransformed(ScalarEvolution &SE,
+                                    const SCEVAddRecExpr *Phi,
+                                    const SCEVAddRecExpr *Requested,
+                                    bool &InvertStep) {
+  Type *PhiTy = SE.getEffectiveSCEVType(Phi->getType());
+  Type *RequestedTy = SE.getEffectiveSCEVType(Requested->getType());
+
+  if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth())
+    return false;
+
+  // Try truncate it if necessary.
+  Phi = dyn_cast<SCEVAddRecExpr>(SE.getTruncateOrNoop(Phi, RequestedTy));
+  if (!Phi)
+    return false;
+
+  // Check whether truncation will help.
+  if (Phi == Requested) {
+    InvertStep = false;
+    return true;
+  }
+
+  // Check whether inverting will help: {R,+,-1} == R - {0,+,1}.
+  if (SE.getAddExpr(Requested->getStart(),
+                    SE.getNegativeSCEV(Requested)) == Phi) {
+    InvertStep = true;
+    return true;
+  }
+
+  return false;
+}
+
 /// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
 /// the base addrec, which is the addrec without any non-loop-dominating
 /// values, and return the PHI.
@@ -1023,49 +1069,87 @@ PHINode *
 SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
                                         const Loop *L,
                                         Type *ExpandTy,
-                                        Type *IntTy) {
+                                        Type *IntTy,
+                                        Type *&TruncTy,
+                                        bool &InvertStep) {
   assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position");
 
   // Reuse a previously-inserted PHI, if present.
   BasicBlock *LatchBlock = L->getLoopLatch();
   if (LatchBlock) {
+    PHINode *AddRecPhiMatch = 0;
+    Instruction *IncV = 0;
+    TruncTy = 0;
+    InvertStep = false;
+
+    // Only try partially matching scevs that need truncation and/or
+    // step-inversion if we know this loop is outside the current loop.
+    bool TryNonMatchingSCEV = IVIncInsertLoop &&
+      SE.DT->properlyDominates(LatchBlock, IVIncInsertLoop->getHeader());
+
     for (BasicBlock::iterator I = L->getHeader()->begin();
          PHINode *PN = dyn_cast<PHINode>(I); ++I) {
-      if (!SE.isSCEVable(PN->getType()) ||
-          (SE.getEffectiveSCEVType(PN->getType()) !=
-           SE.getEffectiveSCEVType(Normalized->getType())) ||
-          SE.getSCEV(PN) != Normalized)
+      if (!SE.isSCEVable(PN->getType()))
+        continue;
+
+      const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(PN));
+      if (!PhiSCEV)
         continue;
 
-      Instruction *IncV =
-        cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock));
+      bool IsMatchingSCEV = PhiSCEV == Normalized;
+      // We only handle truncation and inversion of phi recurrences for the
+      // expanded expression if the expanded expression's loop dominates the
+      // loop we insert to. Check now, so we can bail out early.
+      if (!IsMatchingSCEV && !TryNonMatchingSCEV)
+          continue;
+
+      Instruction *TempIncV =
+          cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock));
 
+      // Check whether we can reuse this PHI node.
       if (LSRMode) {
-        if (!isExpandedAddRecExprPHI(PN, IncV, L))
+        if (!isExpandedAddRecExprPHI(PN, TempIncV, L))
           continue;
-        if (L == IVIncInsertLoop && !hoistIVInc(IncV, IVIncInsertPos))
+        if (L == IVIncInsertLoop && !hoistIVInc(TempIncV, IVIncInsertPos))
           continue;
-      }
-      else {
-        if (!isNormalAddRecExprPHI(PN, IncV, L))
+      } else {
+        if (!isNormalAddRecExprPHI(PN, TempIncV, L))
           continue;
-        if (L == IVIncInsertLoop)
-          do {
-            if (SE.DT->dominates(IncV, IVIncInsertPos))
-              break;
-            // Make sure the increment is where we want it. But don't move it
-            // down past a potential existing post-inc user.
-            IncV->moveBefore(IVIncInsertPos);
-            IVIncInsertPos = IncV;
-            IncV = cast<Instruction>(IncV->getOperand(0));
-          } while (IncV != PN);
       }
+
+      // Stop if we have found an exact match SCEV.
+      if (IsMatchingSCEV) {
+        IncV = TempIncV;
+        TruncTy = 0;
+        InvertStep = false;
+        AddRecPhiMatch = PN;
+        break;
+      }
+
+      // Try whether the phi can be translated into the requested form
+      // (truncated and/or offset by a constant).
+      if ((!TruncTy || InvertStep) &&
+          canBeCheaplyTransformed(SE, PhiSCEV, Normalized, InvertStep)) {
+        // Record the phi node. But don't stop we might find an exact match
+        // later.
+        AddRecPhiMatch = PN;
+        IncV = TempIncV;
+        TruncTy = SE.getEffectiveSCEVType(Normalized->getType());
+      }
+    }
+
+    if (AddRecPhiMatch) {
+      // Potentially, move the increment. We have made sure in
+      // isExpandedAddRecExprPHI or hoistIVInc that this is possible.
+      if (L == IVIncInsertLoop)
+        hoistBeforePos(SE.DT, IncV, IVIncInsertPos, AddRecPhiMatch);
+
       // Ok, the add recurrence looks usable.
       // Remember this PHI, even in post-inc mode.
-      InsertedValues.insert(PN);
+      InsertedValues.insert(AddRecPhiMatch);
       // Remember the increment.
       rememberInstruction(IncV);
-      return PN;
+      return AddRecPhiMatch;
     }
   }
 
@@ -1190,7 +1274,12 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
   // Expand the core addrec. If we need post-loop scaling, force it to
   // expand to an integer type to avoid the need for additional casting.
   Type *ExpandTy = PostLoopScale ? IntTy : STy;
-  PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy);
+  // In some cases, we decide to reuse an existing phi node but need to truncate
+  // it and/or invert the step.
+  Type *TruncTy = 0;
+  bool InvertStep = false;
+  PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy,
+                                          TruncTy, InvertStep);
 
   // Accommodate post-inc mode, if necessary.
   Value *Result;
@@ -1231,6 +1320,26 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
     }
   }
 
+  // We have decided to reuse an induction variable of a dominating loop. Apply
+  // truncation and/or invertion of the step.
+  if (TruncTy) {
+    Type *ResTy = Result->getType();
+    // Normalize the result type.
+    if (ResTy != SE.getEffectiveSCEVType(ResTy))
+      Result = InsertNoopCastOfTo(Result, SE.getEffectiveSCEVType(ResTy));
+    // Truncate the result.
+    if (TruncTy != Result->getType()) {
+      Result = Builder.CreateTrunc(Result, TruncTy);
+      rememberInstruction(Result);
+    }
+    // Invert the result.
+    if (InvertStep) {
+      Result = Builder.CreateSub(expandCodeFor(Normalized->getStart(), TruncTy),
+                                 Result);
+      rememberInstruction(Result);
+    }
+  }
+
   // Re-apply any non-loop-dominating scale.
   if (PostLoopScale) {
     assert(S->isAffine() && "Can't linearly scale non-affine recurrences.");
@@ -1279,7 +1388,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
     Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(),
                                        S->getNoWrapFlags(SCEV::FlagNW)));
     BasicBlock::iterator NewInsertPt =
-      llvm::next(BasicBlock::iterator(cast<Instruction>(V)));
+      std::next(BasicBlock::iterator(cast<Instruction>(V)));
     BuilderType::InsertPointGuard Guard(Builder);
     while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt) ||
            isa<LandingPadInst>(NewInsertPt))
@@ -1507,7 +1616,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
       while (InsertPt != Builder.GetInsertPoint()
              && (isInsertedInstruction(InsertPt)
                  || isa<DbgInfoIntrinsic>(InsertPt))) {
-        InsertPt = llvm::next(BasicBlock::iterator(InsertPt));
+        InsertPt = std::next(BasicBlock::iterator(InsertPt));
       }
       break;
     }
@@ -1528,7 +1637,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
   //
   // This is independent of PostIncLoops. The mapped value simply materializes
   // the expression at this insertion point. If the mapped value happened to be
-  // a postinc expansion, it could be reused by a non postinc user, but only if
+  // a postinc expansion, it could be reused by a non-postinc user, but only if
   // its insertion point was already at the head of the loop.
   InsertedExpressions[std::make_pair(S, InsertPt)] = V;
   return V;
@@ -1562,15 +1671,6 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
   return V;
 }
 
-/// Sort values by integer width for replaceCongruentIVs.
-static bool width_descending(Value *lhs, Value *rhs) {
-  // Put pointers at the back and make sure pointer < pointer = false.
-  if (!lhs->getType()->isIntegerTy() || !rhs->getType()->isIntegerTy())
-    return rhs->getType()->isIntegerTy() && !lhs->getType()->isIntegerTy();
-  return rhs->getType()->getPrimitiveSizeInBits()
-    < lhs->getType()->getPrimitiveSizeInBits();
-}
-
 /// replaceCongruentIVs - Check for congruent phis in this loop header and
 /// replace them with their most canonical representative. Return the number of
 /// phis eliminated.
@@ -1587,7 +1687,13 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
     Phis.push_back(Phi);
   }
   if (TTI)
-    std::sort(Phis.begin(), Phis.end(), width_descending);
+    std::sort(Phis.begin(), Phis.end(), [](Value *LHS, Value *RHS) {
+      // Put pointers at the back and make sure pointer < pointer = false.
+      if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
+        return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy();
+      return RHS->getType()->getPrimitiveSizeInBits() <
+             LHS->getType()->getPrimitiveSizeInBits();
+    });
 
   unsigned NumElim = 0;
   DenseMap<const SCEV *, PHINode *> ExprToIVMap;
diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp
index f110616..1e4c0bd 100644
--- a/lib/Analysis/ScalarEvolutionNormalization.cpp
+++ b/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -12,7 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/Dominators.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ScalarEvolutionNormalization.h"
@@ -131,7 +131,10 @@ TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) {
       // expression: {-2,+,1,+,2} + {1,+,2} => {-1,+,3,+,2}
       if (AR->isAffine() &&
           IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) {
-        Result = SE.getMinusSCEV(Result, AR->getStepRecurrence(SE));
+        const SCEV *TransformedStep =
+          TransformSubExpr(AR->getStepRecurrence(SE),
+                           User, OperandValToReplace);
+        Result = SE.getMinusSCEV(Result, TransformedStep);
         Loops.insert(L);
       }
 #if 0
@@ -144,6 +147,20 @@ TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) {
 #endif
       break;
     case Normalize:
+      // We want to normalize step expression, because otherwise we might not be
+      // able to denormalize to the original expression.
+      //
+      // Here is an example what will happen if we don't normalize step:
+      //  ORIGINAL ISE:
+      //    {(100 /u {1,+,1}<%bb16>),+,(100 /u {1,+,1}<%bb16>)}<%bb25>
+      //  NORMALIZED ISE:
+      //    {((-1 * (100 /u {1,+,1}<%bb16>)) + (100 /u {0,+,1}<%bb16>)),+,
+      //     (100 /u {0,+,1}<%bb16>)}<%bb25>
+      //  DENORMALIZED BACK ISE:
+      //    {((2 * (100 /u {1,+,1}<%bb16>)) + (-1 * (100 /u {2,+,1}<%bb16>))),+,
+      //     (100 /u {1,+,1}<%bb16>)}<%bb25>
+      //  Note that the initial value changes after normalization +
+      //  denormalization, which isn't correct.
       if (Loops.count(L)) {
         const SCEV *TransformedStep =
           TransformSubExpr(AR->getStepRecurrence(SE),
@@ -157,8 +174,14 @@ TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) {
 #endif
       break;
     case Denormalize:
-      if (Loops.count(L))
-        Result = cast<SCEVAddRecExpr>(Result)->getPostIncExpr(SE);
+      // Here we want to normalize step expressions for the same reasons, as
+      // stated above.
+      if (Loops.count(L)) {
+        const SCEV *TransformedStep =
+          TransformSubExpr(AR->getStepRecurrence(SE),
+                           User, OperandValToReplace);
+        Result = SE.getAddExpr(Result, TransformedStep);
+      }
       break;
     }
     return Result;
diff --git a/lib/Analysis/SparsePropagation.cpp b/lib/Analysis/SparsePropagation.cpp
index 15b7872..87a4fa4 100644
--- a/lib/Analysis/SparsePropagation.cpp
+++ b/lib/Analysis/SparsePropagation.cpp
@@ -303,11 +303,10 @@ void SparseSolver::Solve(Function &F) {
 
       // "I" got into the work list because it made a transition.  See if any
       // users are both live and in need of updating.
-      for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
-           UI != E; ++UI) {
-        Instruction *U = cast<Instruction>(*UI);
-        if (BBExecutable.count(U->getParent()))   // Inst is executable?
-          visitInst(*U);
+      for (User *U : I->users()) {
+        Instruction *UI = cast<Instruction>(U);
+        if (BBExecutable.count(UI->getParent()))   // Inst is executable?
+          visitInst(*UI);
       }
     }
 
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 0353295..04d09f1 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -9,12 +9,12 @@
 
 #define DEBUG_TYPE "tti"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Operator.h"
 #include "llvm/IR/Instruction.h"
-#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/Support/CallSite.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/Support/ErrorHandling.h"
 
 using namespace llvm;
@@ -35,16 +35,6 @@ void TargetTransformInfo::pushTTIStack(Pass *P) {
     PTTI->TopTTI = this;
 }
 
-void TargetTransformInfo::popTTIStack() {
-  TopTTI = 0;
-
-  // Walk up the chain and update the top TTI pointer.
-  for (TargetTransformInfo *PTTI = PrevTTI; PTTI; PTTI = PTTI->PrevTTI)
-    PTTI->TopTTI = PrevTTI;
-
-  PrevTTI = 0;
-}
-
 void TargetTransformInfo::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<TargetTransformInfo>();
 }
@@ -158,6 +148,16 @@ unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
   return PrevTTI->getIntImmCost(Imm, Ty);
 }
 
+unsigned TargetTransformInfo::getIntImmCost(unsigned Opc, unsigned Idx,
+                                            const APInt &Imm, Type *Ty) const {
+  return PrevTTI->getIntImmCost(Opc, Idx, Imm, Ty);
+}
+
+unsigned TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+                                            const APInt &Imm, Type *Ty) const {
+  return PrevTTI->getIntImmCost(IID, Idx, Imm, Ty);
+}
+
 unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
   return PrevTTI->getNumberOfRegisters(Vector);
 }
@@ -231,22 +231,23 @@ unsigned TargetTransformInfo::getReductionCost(unsigned Opcode, Type *Ty,
 
 namespace {
 
-struct NoTTI : ImmutablePass, TargetTransformInfo {
+struct NoTTI final : ImmutablePass, TargetTransformInfo {
   const DataLayout *DL;
 
   NoTTI() : ImmutablePass(ID), DL(0) {
     initializeNoTTIPass(*PassRegistry::getPassRegistry());
   }
 
-  virtual void initializePass() {
+  virtual void initializePass() override {
     // Note that this subclass is special, and must *not* call initializeTTI as
     // it does not chain.
     TopTTI = this;
     PrevTTI = 0;
-    DL = getAnalysisIfAvailable<DataLayout>();
+    DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+    DL = DLP ? &DLP->getDataLayout() : 0;
   }
 
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
     // Note that this subclass is special, and must *not* call
     // TTI::getAnalysisUsage as it breaks the recursion.
   }
@@ -255,13 +256,14 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
   static char ID;
 
   /// Provide necessary pointer adjustments for the two base classes.
-  virtual void *getAdjustedAnalysisPointer(const void *ID) {
+  virtual void *getAdjustedAnalysisPointer(const void *ID) override {
     if (ID == &TargetTransformInfo::ID)
       return (TargetTransformInfo*)this;
     return this;
   }
 
-  unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) const {
+  unsigned getOperationCost(unsigned Opcode, Type *Ty,
+                            Type *OpTy) const override {
     switch (Opcode) {
     default:
       // By default, just classify everything as 'basic'.
@@ -318,7 +320,7 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
   }
 
   unsigned getGEPCost(const Value *Ptr,
-                      ArrayRef<const Value *> Operands) const {
+                      ArrayRef<const Value *> Operands) const override {
     // In the basic model, we just assume that all-constant GEPs will be folded
     // into their uses via addressing modes.
     for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx)
@@ -328,7 +330,8 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
     return TCC_Free;
   }
 
-  unsigned getCallCost(FunctionType *FTy, int NumArgs = -1) const {
+  unsigned getCallCost(FunctionType *FTy, int NumArgs = -1) const override
+  {
     assert(FTy && "FunctionType must be provided to this routine.");
 
     // The target-independent implementation just measures the size of the
@@ -343,7 +346,8 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
     return TCC_Basic * (NumArgs + 1);
   }
 
-  unsigned getCallCost(const Function *F, int NumArgs = -1) const {
+  unsigned getCallCost(const Function *F, int NumArgs = -1) const override
+  {
     assert(F && "A concrete function must be provided to this routine.");
 
     if (NumArgs < 0)
@@ -364,7 +368,7 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
   }
 
   unsigned getCallCost(const Function *F,
-                       ArrayRef<const Value *> Arguments) const {
+                       ArrayRef<const Value *> Arguments) const override {
     // Simply delegate to generic handling of the call.
     // FIXME: We should use instsimplify or something else to catch calls which
     // will constant fold with these arguments.
@@ -372,7 +376,7 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
   }
 
   unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                            ArrayRef<Type *> ParamTys) const {
+                            ArrayRef<Type *> ParamTys) const override {
     switch (IID) {
     default:
       // Intrinsics rarely (if ever) have normal argument setup constraints.
@@ -394,8 +398,9 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
     }
   }
 
-  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                            ArrayRef<const Value *> Arguments) const {
+  unsigned
+  getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+                   ArrayRef<const Value *> Arguments) const override {
     // Delegate to the generic intrinsic handling code. This mostly provides an
     // opportunity for targets to (for example) special case the cost of
     // certain intrinsics based on constants used as arguments.
@@ -406,14 +411,14 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
     return TopTTI->getIntrinsicCost(IID, RetTy, ParamTys);
   }
 
-  unsigned getUserCost(const User *U) const {
+  unsigned getUserCost(const User *U) const override {
     if (isa<PHINode>(U))
       return TCC_Free; // Model all PHI nodes as free.
 
-    if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U))
-      // In the basic model we just assume that all-constant GEPs will be
-      // folded into their uses via addressing modes.
-      return GEP->hasAllConstantIndices() ? TCC_Free : TCC_Basic;
+    if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
+      SmallVector<const Value *, 4> Indices(GEP->idx_begin(), GEP->idx_end());
+      return TopTTI->getGEPCost(GEP->getPointerOperand(), Indices);
+    }
 
     if (ImmutableCallSite CS = U) {
       const Function *F = CS.getCalledFunction();
@@ -423,12 +428,7 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
         return TopTTI->getCallCost(cast<FunctionType>(FTy), CS.arg_size());
       }
 
-      SmallVector<const Value *, 8> Arguments;
-      for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(),
-                                           AE = CS.arg_end();
-           AI != AE; ++AI)
-        Arguments.push_back(*AI);
-
+      SmallVector<const Value *, 8> Arguments(CS.arg_begin(), CS.arg_end());
       return TopTTI->getCallCost(F, Arguments);
     }
 
@@ -446,9 +446,9 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
                                 U->getOperand(0)->getType() : 0);
   }
 
-  bool hasBranchDivergence() const { return false; }
+  bool hasBranchDivergence() const override { return false; }
 
-  bool isLoweredToCall(const Function *F) const {
+  bool isLoweredToCall(const Function *F) const override {
     // FIXME: These should almost certainly not be handled here, and instead
     // handled with the help of TLI or the target itself. This was largely
     // ported from existing analysis heuristics here so that such refactorings
@@ -479,126 +479,136 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
     return true;
   }
 
-  void getUnrollingPreferences(Loop *, UnrollingPreferences &) const { }
+  void getUnrollingPreferences(Loop *, UnrollingPreferences &) const override {
+  }
 
-  bool isLegalAddImmediate(int64_t Imm) const {
+  bool isLegalAddImmediate(int64_t Imm) const override {
     return false;
   }
 
-  bool isLegalICmpImmediate(int64_t Imm) const {
+  bool isLegalICmpImmediate(int64_t Imm) const override {
     return false;
   }
 
   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
-                             bool HasBaseReg, int64_t Scale) const {
+                             bool HasBaseReg, int64_t Scale) const override
+  {
     // Guess that reg+reg addressing is allowed. This heuristic is taken from
     // the implementation of LSR.
     return !BaseGV && BaseOffset == 0 && Scale <= 1;
   }
 
   int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
-                           bool HasBaseReg, int64_t Scale) const {
+                           bool HasBaseReg, int64_t Scale) const override {
     // Guess that all legal addressing mode are free.
     if(isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale))
       return 0;
     return -1;
   }
 
-
-  bool isTruncateFree(Type *Ty1, Type *Ty2) const {
+  bool isTruncateFree(Type *Ty1, Type *Ty2) const override {
     return false;
   }
 
-  bool isTypeLegal(Type *Ty) const {
+  bool isTypeLegal(Type *Ty) const override {
     return false;
   }
 
-  unsigned getJumpBufAlignment() const {
+  unsigned getJumpBufAlignment() const override {
     return 0;
   }
 
-  unsigned getJumpBufSize() const {
+  unsigned getJumpBufSize() const override {
     return 0;
   }
 
-  bool shouldBuildLookupTables() const {
+  bool shouldBuildLookupTables() const override {
     return true;
   }
 
-  PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const {
+  PopcntSupportKind
+  getPopcntSupport(unsigned IntTyWidthInBit) const override {
     return PSK_Software;
   }
 
-  bool haveFastSqrt(Type *Ty) const {
+  bool haveFastSqrt(Type *Ty) const override {
     return false;
   }
 
-  unsigned getIntImmCost(const APInt &Imm, Type *Ty) const {
-    return 1;
+  unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override {
+    return TCC_Basic;
+  }
+
+  unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+                         Type *Ty) const override {
+    return TCC_Free;
+  }
+
+  unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+                         Type *Ty) const override {
+    return TCC_Free;
   }
 
-  unsigned getNumberOfRegisters(bool Vector) const {
+  unsigned getNumberOfRegisters(bool Vector) const override {
     return 8;
   }
 
-  unsigned  getRegisterBitWidth(bool Vector) const {
+  unsigned  getRegisterBitWidth(bool Vector) const override {
     return 32;
   }
 
-  unsigned getMaximumUnrollFactor() const {
+  unsigned getMaximumUnrollFactor() const override {
     return 1;
   }
 
   unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind,
-                                  OperandValueKind) const {
+                                  OperandValueKind) const override {
     return 1;
   }
 
-  unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
-                          int Index = 0, Type *SubTp = 0) const {
+  unsigned getShuffleCost(ShuffleKind Kind, Type *Ty,
+                          int Index = 0, Type *SubTp = 0) const override {
     return 1;
   }
 
   unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
-                            Type *Src) const {
+                            Type *Src) const override {
     return 1;
   }
 
-  unsigned getCFInstrCost(unsigned Opcode) const {
+  unsigned getCFInstrCost(unsigned Opcode) const override {
     return 1;
   }
 
   unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                              Type *CondTy = 0) const {
+                              Type *CondTy = 0) const override {
     return 1;
   }
 
   unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
-                              unsigned Index = -1) const {
+                              unsigned Index = -1) const override {
     return 1;
   }
 
-  unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
-                           unsigned Alignment,
-                           unsigned AddressSpace) const {
+  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+                           unsigned AddressSpace) const override {
     return 1;
   }
 
-  unsigned getIntrinsicInstrCost(Intrinsic::ID ID,
-                                 Type *RetTy,
-                                 ArrayRef<Type*> Tys) const {
+  unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+                                 ArrayRef<Type*> Tys) const override {
     return 1;
   }
 
-  unsigned getNumberOfParts(Type *Tp) const {
+  unsigned getNumberOfParts(Type *Tp) const override {
     return 0;
   }
 
-  unsigned getAddressComputationCost(Type *Tp, bool) const {
+  unsigned getAddressComputationCost(Type *Tp, bool) const override {
     return 0;
   }
 
-  unsigned getReductionCost(unsigned, Type *, bool) const {
+  unsigned getReductionCost(unsigned, Type *, bool) const override {
     return 1;
   }
 };
diff --git a/lib/Analysis/Trace.cpp b/lib/Analysis/Trace.cpp
index 4c68322..5a1acc0 100644
--- a/lib/Analysis/Trace.cpp
+++ b/lib/Analysis/Trace.cpp
@@ -16,7 +16,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/Trace.h"
-#include "llvm/Assembly/Writer.h"
 #include "llvm/IR/Function.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -37,7 +36,7 @@ void Trace::print(raw_ostream &O) const {
   O << "; Trace from function " << F->getName() << ", blocks:\n";
   for (const_iterator i = begin(), e = end(); i != e; ++i) {
     O << "; ";
-    WriteAsOperand(O, *i, true, getModule());
+    (*i)->printAsOperand(O, true, getModule());
     O << "\n";
   }
   O << "; Trace parent function: \n" << *F;
diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 6791d4b..05daf18 100644
--- a/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -281,7 +281,7 @@ namespace {
       initializeTypeBasedAliasAnalysisPass(*PassRegistry::getPassRegistry());
     }
 
-    virtual void initializePass() {
+    void initializePass() override {
       InitializeAliasAnalysis(this);
     }
 
@@ -289,7 +289,7 @@ namespace {
     /// an analysis interface through multiple inheritance.  If needed, it
     /// should override this to adjust the this pointer as needed for the
     /// specified pass info.
-    virtual void *getAdjustedAnalysisPointer(const void *PI) {
+    void *getAdjustedAnalysisPointer(const void *PI) override {
       if (PI == &AliasAnalysis::ID)
         return (AliasAnalysis*)this;
       return this;
@@ -299,15 +299,15 @@ namespace {
     bool PathAliases(const MDNode *A, const MDNode *B) const;
 
   private:
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-    virtual AliasResult alias(const Location &LocA, const Location &LocB);
-    virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal);
-    virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
-    virtual ModRefBehavior getModRefBehavior(const Function *F);
-    virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
-                                       const Location &Loc);
-    virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
-                                       ImmutableCallSite CS2);
+    void getAnalysisUsage(AnalysisUsage &AU) const override;
+    AliasResult alias(const Location &LocA, const Location &LocB) override;
+    bool pointsToConstantMemory(const Location &Loc, bool OrLocal) override;
+    ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override;
+    ModRefBehavior getModRefBehavior(const Function *F) override;
+    ModRefResult getModRefInfo(ImmutableCallSite CS,
+                               const Location &Loc) override;
+    ModRefResult getModRefInfo(ImmutableCallSite CS1,
+                               ImmutableCallSite CS2) override;
   };
 }  // End of anonymous namespace
 
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index e39ee62..72617a0 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -16,8 +16,10 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/GlobalAlias.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Instructions.h"
@@ -25,10 +27,8 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Operator.h"
-#include "llvm/Support/ConstantRange.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/PatternMatch.h"
 #include <cstring>
 using namespace llvm;
 using namespace llvm::PatternMatch;
@@ -311,8 +311,9 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
   if (Argument *A = dyn_cast<Argument>(V)) {
     unsigned Align = 0;
 
-    if (A->hasByValAttr()) {
-      // Get alignment information off byval arguments if specified in the IR.
+    if (A->hasByValOrInAllocaAttr()) {
+      // Get alignment information off byval/inalloca arguments if specified in
+      // the IR.
       Align = A->getParamAlignment();
     } else if (TD && A->hasStructRetAttr()) {
       // An sret parameter has at least the ABI alignment of the return type.
@@ -1959,9 +1960,8 @@ llvm::GetUnderlyingObjects(Value *V,
 /// are lifetime markers.
 ///
 bool llvm::onlyUsedByLifetimeMarkers(const Value *V) {
-  for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
-       UI != UE; ++UI) {
-    const IntrinsicInst *II = dyn_cast<IntrinsicInst>(*UI);
+  for (const User *U : V->users()) {
+    const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
     if (!II) return false;
 
     if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
@@ -2006,7 +2006,9 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
   }
   case Instruction::Load: {
     const LoadInst *LI = cast<LoadInst>(Inst);
-    if (!LI->isUnordered())
+    if (!LI->isUnordered() ||
+        // Speculative load may create a race that did not exist in the source.
+        LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread))
       return false;
     return LI->getPointerOperand()->isDereferenceablePointer();
   }
@@ -2033,6 +2035,12 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
        case Intrinsic::umul_with_overflow:
        case Intrinsic::usub_with_overflow:
          return true;
+       // Sqrt should be OK, since the llvm sqrt intrinsic isn't defined to set
+       // errno like libm sqrt would.
+       case Intrinsic::sqrt:
+       case Intrinsic::fma:
+       case Intrinsic::fmuladd:
+         return true;
        // TODO: some fp intrinsics are marked as having the same error handling
        // as libm. They're safe to speculate when they won't error.
        // TODO: are convert_{from,to}_fp16 safe?
@@ -2068,9 +2076,9 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
   // Alloca never returns null, malloc might.
   if (isa<AllocaInst>(V)) return true;
 
-  // A byval argument is never null.
+  // A byval or inalloca argument is never null.
   if (const Argument *A = dyn_cast<Argument>(V))
-    return A->hasByValAttr();
+    return A->hasByValOrInAllocaAttr();
 
   // Global values are not null unless extern weak.
   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
author	Stephen Hines <srhines@google.com>	2014-04-23 16:57:46 -0700
committer	Stephen Hines <srhines@google.com>	2014-04-24 15:53:16 -0700
commit	36b56886974eae4f9c5ebc96befd3e7bfe5de338 (patch)
tree	e6cfb69fbbd937f450eeb83bfb83b9da3b01275a /lib/Analysis
parent	69a8640022b04415ae9fac62f8ab090601d8f889 (diff)
download	external_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.zip external_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.tar.gz external_llvm-36b56886974eae4f9c5ebc96befd3e7bfe5de338.tar.bz2