am 1c4ad5ef: Merge branch \'upstream\' into merge-2012_09_10

* commit '1c4ad5ef4fab105f0c8af7edd026e00502fb6279': (446 commits) Revert r163556. Missed updates to tablegen files. Update function names to conform to guidelines. No functional change intended. test/CodeGen/X86/ms-inline-asm.ll: Relax for non-darwin x86 targets. '##InlineAsm' could not be seen in other hosts. [ms-inline asm] Properly emit the asm directives when the AsmPrinterVariant and InlineAsmVariant don't match. Update test case for Release builds. Remove redundant semicolons which are null statements. Disable stack coloring because it makes dragonegg fail bootstrapping. [ms-inline asm] Pass the correct AsmVariant to the PrintAsmOperand() function and update the printOperand() function accordingly. [ms-inline asm] Add support for .att_syntax directive. Enable stack coloring. Don't attempt to use flags from predicated instructions. [Object] Extract Elf_Ehdr. Patch by Hemant Kulkarni! Stack Coloring: Handle the case where END markers come before BEGIN markers properly. Enhance PR11334 fix to support extload from v2f32/v4f32 Add "blocked" heuristic to the Hexagon MI scheduler. Fold multiply by 0 or 1 when in UnsafeFPMath mode in SelectionDAG::getNode(). whitespace Add boolean simplification support from CMOV Fix an assertion failure when optimising a shufflevector incorrectly into concat_vectors, and a followup bug with SelectionDAG::getNode() creating nodes with invalid types. Minor cleanup. No functional change. ...
author: Stephen Hines <srhines@google.com> 2012-09-13 19:09:19 -0700
committer: Android Git Automerger <android-git-automerger@android.com> 2012-09-13 19:09:19 -0700
commit: 78c041bd883d86c81c42b98f326660277e6d0d9a (patch)
tree: 52800183ec2d22164b8f396842142c3a8aab912a /lib/Transforms
parent: 828ded66831c0caaeecd2291a6bfb084f373d0e4 (diff)
parent: 1c4ad5ef4fab105f0c8af7edd026e00502fb6279 (diff)
download: external_llvm-78c041bd883d86c81c42b98f326660277e6d0d9a.zip
external_llvm-78c041bd883d86c81c42b98f326660277e6d0d9a.tar.gz
external_llvm-78c041bd883d86c81c42b98f326660277e6d0d9a.tar.bz2
37 files changed, 1498 insertions, 540 deletions
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 6d950d2..b888e95 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -346,7 +346,7 @@ static bool isLeakCheckerRoot(GlobalVariable *GV) {
 /// Given a value that is stored to a global but never read, determine whether
 /// it's safe to remove the store and the chain of computation that feeds the
 /// store.
-static bool IsSafeComputationToRemove(Value *V) {
+static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) {
   do {
     if (isa<Constant>(V))
       return true;
@@ -355,7 +355,7 @@ static bool IsSafeComputationToRemove(Value *V) {
     if (isa<LoadInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V) ||
         isa<GlobalValue>(V))
       return false;
-    if (isAllocationFn(V))
+    if (isAllocationFn(V, TLI))
       return true;
 
     Instruction *I = cast<Instruction>(V);
@@ -376,7 +376,8 @@ static bool IsSafeComputationToRemove(Value *V) {
 /// of the global and clean up any that obviously don't assign the global a
 /// value that isn't dynamically allocated.
 ///
-static bool CleanupPointerRootUsers(GlobalVariable *GV) {
+static bool CleanupPointerRootUsers(GlobalVariable *GV,
+                                    const TargetLibraryInfo *TLI) {
   // A brief explanation of leak checkers.  The goal is to find bugs where
   // pointers are forgotten, causing an accumulating growth in memory
   // usage over time.  The common strategy for leak checkers is to whitelist the
@@ -432,18 +433,18 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV) {
         C->destroyConstant();
         // This could have invalidated UI, start over from scratch.
         Dead.clear();
-        CleanupPointerRootUsers(GV);
+        CleanupPointerRootUsers(GV, TLI);
         return true;
       }
     }
   }
 
   for (int i = 0, e = Dead.size(); i != e; ++i) {
-    if (IsSafeComputationToRemove(Dead[i].first)) {
+    if (IsSafeComputationToRemove(Dead[i].first, TLI)) {
       Dead[i].second->eraseFromParent();
       Instruction *I = Dead[i].first;
       do {
-	if (isAllocationFn(I))
+	if (isAllocationFn(I, TLI))
 	  break;
         Instruction *J = dyn_cast<Instruction>(I->getOperand(0));
         if (!J)
@@ -975,7 +976,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
   // nor is the global.
   if (AllNonStoreUsesGone) {
     if (isLeakCheckerRoot(GV)) {
-      Changed |= CleanupPointerRootUsers(GV);
+      Changed |= CleanupPointerRootUsers(GV, TLI);
     } else {
       Changed = true;
       CleanupConstantGlobalUsers(GV, 0, TD, TLI);
@@ -1465,9 +1466,10 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
 /// PerformHeapAllocSRoA - CI is an allocation of an array of structures.  Break
 /// it up into multiple allocations of arrays of the fields.
 static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
-                                            Value *NElems, TargetData *TD) {
+                                            Value *NElems, TargetData *TD,
+                                            const TargetLibraryInfo *TLI) {
   DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << "  MALLOC = " << *CI << '\n');
-  Type *MAT = getMallocAllocatedType(CI);
+  Type *MAT = getMallocAllocatedType(CI, TLI);
   StructType *STy = cast<StructType>(MAT);
 
   // There is guaranteed to be at least one use of the malloc (storing
@@ -1688,7 +1690,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
   // This eliminates dynamic allocation, avoids an indirection accessing the
   // data, and exposes the resultant global to further GlobalOpt.
   // We cannot optimize the malloc if we cannot determine malloc array size.
-  Value *NElems = getMallocArraySize(CI, TD, true);
+  Value *NElems = getMallocArraySize(CI, TD, TLI, true);
   if (!NElems)
     return false;
 
@@ -1725,7 +1727,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
 
     // If this is a fixed size array, transform the Malloc to be an alloc of
     // structs.  malloc [100 x struct],1 -> malloc struct, 100
-    if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) {
+    if (ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI, TLI))) {
       Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
       unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes();
       Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
@@ -1742,7 +1744,8 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
         CI = cast<CallInst>(Malloc);
     }
 
-    GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true), TD);
+    GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, TLI, true),
+                               TD, TLI);
     return true;
   }
 
@@ -1771,8 +1774,8 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
       // Optimize away any trapping uses of the loaded value.
       if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, TD, TLI))
         return true;
-    } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) {
-      Type *MallocType = getMallocAllocatedType(CI);
+    } else if (CallInst *CI = extractMallocCall(StoredOnceVal, TLI)) {
+      Type *MallocType = getMallocAllocatedType(CI, TLI);
       if (MallocType &&
           TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, Ordering, GVI,
                                              TD, TLI))
@@ -1964,7 +1967,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
     bool Changed;
     if (isLeakCheckerRoot(GV)) {
       // Delete any constant stores to the global.
-      Changed = CleanupPointerRootUsers(GV);
+      Changed = CleanupPointerRootUsers(GV, TLI);
     } else {
       // Delete any stores we can find to the global.  We may not be able to
       // make it completely dead though.
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 712888a..69a22fb 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -20,6 +20,7 @@
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/InlineCost.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/IPO/InlinerPass.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
@@ -339,6 +340,7 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
 bool Inliner::runOnSCC(CallGraphSCC &SCC) {
   CallGraph &CG = getAnalysis<CallGraph>();
   const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+  const TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
 
   SmallPtrSet<Function*, 8> SCCFunctions;
   DEBUG(dbgs() << "Inliner visiting SCC:");
@@ -417,7 +419,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
       // just delete the call instead of trying to inline it, regardless of
       // size.  This happens because IPSCCP propagates the result out of the
       // call and then we're left with the dead call.
-      if (isInstructionTriviallyDead(CS.getInstruction())) {
+      if (isInstructionTriviallyDead(CS.getInstruction(), TLI)) {
         DEBUG(dbgs() << "    -> Deleting dead call: "
                      << *CS.getInstruction() << "\n");
         // Update the call graph by deleting the edge from Callee to Caller.
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index cbe1ca4..b12fc01 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -168,7 +168,7 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
 /// the heavy lifting.
 ///
 Instruction *InstCombiner::visitCallInst(CallInst &CI) {
-  if (isFreeCall(&CI))
+  if (isFreeCall(&CI, TLI))
     return visitFree(CI);
 
   // If the caller function is nounwind, mark the call as nounwind, even if the
@@ -243,7 +243,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   default: break;
   case Intrinsic::objectsize: {
     uint64_t Size;
-    if (getObjectSize(II->getArgOperand(0), Size, TD))
+    if (getObjectSize(II->getArgOperand(0), Size, TD, TLI))
       return ReplaceInstUsesWith(CI, ConstantInt::get(CI.getType(), Size));
     return 0;
   }
@@ -877,7 +877,7 @@ static IntrinsicInst *FindInitTrampoline(Value *Callee) {
 // visitCallSite - Improvements for call and invoke instructions.
 //
 Instruction *InstCombiner::visitCallSite(CallSite CS) {
-  if (isAllocLikeFn(CS.getInstruction()))
+  if (isAllocLikeFn(CS.getInstruction(), TLI))
     return visitAllocSite(*CS.getInstruction());
 
   bool Changed = false;
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 35a0bbb..2a7182f 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -462,6 +462,16 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
     }
   }
 
+  // (x lshr C1) udiv C2 --> x udiv (C2 << C1)
+  if (ConstantInt *C2 = dyn_cast<ConstantInt>(Op1)) {
+    Value *X;
+    ConstantInt *C1;
+    if (match(Op0, m_LShr(m_Value(X), m_ConstantInt(C1)))) {
+      APInt NC = C2->getValue().shl(C1->getLimitedValue(C1->getBitWidth()-1));
+      return BinaryOperator::CreateUDiv(X, Builder->getInt(NC));
+    }
+  }
+
   // X udiv (C1 << N), where C1 is "1<<C2"  -->  X >> (N+C2)
   { const APInt *CI; Value *N;
     if (match(Op1, m_Shl(m_Power2(CI), m_Value(N))) ||
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 68ecd51..ff758c4 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1068,7 +1068,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
         // If the bitcast is of an allocation, and the allocation will be
         // converted to match the type of the cast, don't touch this.
         if (isa<AllocaInst>(BCI->getOperand(0)) ||
-            isAllocationFn(BCI->getOperand(0))) {
+            isAllocationFn(BCI->getOperand(0), TLI)) {
           // See if the bitcast simplifies, if so, don't nuke this GEP yet.
           if (Instruction *I = visitBitCast(*BCI)) {
             if (I != BCI) {
@@ -1107,7 +1107,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
 
 
 static bool
-isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users) {
+isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users,
+                     const TargetLibraryInfo *TLI) {
   SmallVector<Instruction*, 4> Worklist;
   Worklist.push_back(AI);
 
@@ -1163,7 +1164,7 @@ isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users) {
           }
         }
 
-        if (isFreeCall(I)) {
+        if (isFreeCall(I, TLI)) {
           Users.push_back(I);
           continue;
         }
@@ -1188,7 +1189,7 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
   // to null and free calls, delete the calls and replace the comparisons with
   // true or false as appropriate.
   SmallVector<WeakVH, 64> Users;
-  if (isAllocSiteRemovable(&MI, Users)) {
+  if (isAllocSiteRemovable(&MI, Users, TLI)) {
     for (unsigned i = 0, e = Users.size(); i != e; ++i) {
       Instruction *I = cast_or_null<Instruction>(&*Users[i]);
       if (!I) continue;
@@ -1872,7 +1873,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
       Instruction *Inst = BBI++;
 
       // DCE instruction if trivially dead.
-      if (isInstructionTriviallyDead(Inst)) {
+      if (isInstructionTriviallyDead(Inst, TLI)) {
         ++NumDeadInst;
         DEBUG(errs() << "IC: DCE: " << *Inst << '\n');
         Inst->eraseFromParent();
@@ -2002,7 +2003,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
     if (I == 0) continue;  // skip null values.
 
     // Check to see if we can DCE the instruction.
-    if (isInstructionTriviallyDead(I)) {
+    if (isInstructionTriviallyDead(I, TLI)) {
       DEBUG(errs() << "IC: DCE: " << *I << '\n');
       EraseInstFromFunction(*I);
       ++NumDeadInst;
@@ -2102,7 +2103,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
 
         // If the instruction was modified, it's possible that it is now dead.
         // if so, remove it.
-        if (isInstructionTriviallyDead(I)) {
+        if (isInstructionTriviallyDead(I, TLI)) {
           EraseInstFromFunction(*I);
         } else {
           Worklist.Add(I);
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 06f4d2f..0775cf4 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -15,7 +15,7 @@
 
 #define DEBUG_TYPE "asan"
 
-#include "FunctionBlackList.h"
+#include "BlackList.h"
 #include "llvm/Function.h"
 #include "llvm/IRBuilder.h"
 #include "llvm/InlineAsm.h"
@@ -217,7 +217,7 @@ struct AddressSanitizer : public ModulePass {
   Function *AsanCtorFunction;
   Function *AsanInitFunction;
   Instruction *CtorInsertBefore;
-  OwningPtr<FunctionBlackList> BL;
+  OwningPtr<BlackList> BL;
   // This array is indexed by AccessIsWrite and log2(AccessSize).
   Function *AsanErrorCallback[2][kNumberOfAccessSizes];
   InlineAsm *EmptyAsm;
@@ -544,6 +544,7 @@ bool AddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) {
   Type *Ty = cast<PointerType>(G->getType())->getElementType();
   DEBUG(dbgs() << "GLOBAL: " << *G);
 
+  if (BL->isIn(*G)) return false;
   if (!Ty->isSized()) return false;
   if (!G->hasInitializer()) return false;
   // Touch only those globals that will not be defined in other modules.
@@ -643,6 +644,8 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
     Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize);
     // Determine whether this global should be poisoned in initialization.
     bool GlobalHasDynamicInitializer = HasDynamicInitializer(G);
+    // Don't check initialization order if this global is blacklisted.
+    GlobalHasDynamicInitializer &= !BL->isInInit(*G);
 
     StructType *NewTy = StructType::get(Ty, RightRedZoneTy, NULL);
     Constant *NewInitializer = ConstantStruct::get(
@@ -736,7 +739,7 @@ bool AddressSanitizer::runOnModule(Module &M) {
   TD = getAnalysisIfAvailable<TargetData>();
   if (!TD)
     return false;
-  BL.reset(new FunctionBlackList(ClBlackListFile));
+  BL.reset(new BlackList(ClBlackListFile));
 
   C = &(M.getContext());
   LongSize = TD->getPointerSizeInBits();
@@ -774,7 +777,7 @@ bool AddressSanitizer::runOnModule(Module &M) {
                             /*hasSideEffects=*/true);
 
   llvm::Triple targetTriple(M.getTargetTriple());
-  bool isAndroid = targetTriple.getEnvironment() == llvm::Triple::ANDROIDEABI;
+  bool isAndroid = targetTriple.getEnvironment() == llvm::Triple::Android;
 
   MappingOffset = isAndroid ? kDefaultShadowOffsetAndroid :
     (LongSize == 32 ? kDefaultShadowOffset32 : kDefaultShadowOffset64);
diff --git a/lib/Transforms/Instrumentation/BlackList.cpp b/lib/Transforms/Instrumentation/BlackList.cpp
new file mode 100644
index 0000000..2cb1199
--- /dev/null
+++ b/lib/Transforms/Instrumentation/BlackList.cpp
@@ -0,0 +1,102 @@
+//===-- BlackList.cpp - blacklist for sanitizers --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a utility class for instrumentation passes (like AddressSanitizer
+// or ThreadSanitizer) to avoid instrumenting some functions or global
+// variables based on a user-supplied blacklist.
+//
+//===----------------------------------------------------------------------===//
+
+#include <utility>
+#include <string>
+
+#include "BlackList.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Module.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+
+namespace llvm {
+
+BlackList::BlackList(const StringRef Path) {
+  // Validate and open blacklist file.
+  if (!Path.size()) return;
+  OwningPtr<MemoryBuffer> File;
+  if (error_code EC = MemoryBuffer::getFile(Path, File)) {
+    report_fatal_error("Can't open blacklist file: " + Path + ": " +
+                       EC.message());
+  }
+
+  // Iterate through each line in the blacklist file.
+  SmallVector<StringRef, 16> Lines;
+  SplitString(File.take()->getBuffer(), Lines, "\n\r");
+  StringMap<std::string> Regexps;
+  for (SmallVector<StringRef, 16>::iterator I = Lines.begin(), E = Lines.end();
+       I != E; ++I) {
+    // Get our prefix and unparsed regexp.
+    std::pair<StringRef, StringRef> SplitLine = I->split(":");
+    StringRef Prefix = SplitLine.first;
+    std::string Regexp = SplitLine.second;
+
+    // Replace * with .*
+    for (size_t pos = 0; (pos = Regexp.find("*", pos)) != std::string::npos;
+         pos += strlen(".*")) {
+      Regexp.replace(pos, strlen("*"), ".*");
+    }
+
+    // Check that the regexp is valid.
+    Regex CheckRE(Regexp);
+    std::string Error;
+    if (!CheckRE.isValid(Error)) {
+      report_fatal_error("malformed blacklist regex: " + SplitLine.second +
+          ": " + Error);
+    }
+
+    // Add this regexp into the proper group by its prefix.
+    if (Regexps[Prefix].size())
+      Regexps[Prefix] += "|";
+    Regexps[Prefix] += Regexp;
+  }
+
+  // Iterate through each of the prefixes, and create Regexs for them.
+  for (StringMap<std::string>::iterator I = Regexps.begin(), E = Regexps.end();
+       I != E; ++I) {
+    Entries[I->getKey()] = new Regex(I->getValue());
+  }
+}
+
+bool BlackList::isIn(const Function &F) {
+  return isIn(*F.getParent()) || inSection("fun", F.getName());
+}
+
+bool BlackList::isIn(const GlobalVariable &G) {
+  return isIn(*G.getParent()) || inSection("global", G.getName());
+}
+
+bool BlackList::isIn(const Module &M) {
+  return inSection("src", M.getModuleIdentifier());
+}
+
+bool BlackList::isInInit(const GlobalVariable &G) {
+  return isIn(*G.getParent()) || inSection("global-init", G.getName());
+}
+
+bool BlackList::inSection(const StringRef Section,
+                                  const StringRef Query) {
+  Regex *FunctionRegex = Entries[Section];
+  return FunctionRegex ? FunctionRegex->match(Query) : false;
+}
+
+}  // namespace llvm
diff --git a/lib/Transforms/Instrumentation/BlackList.h b/lib/Transforms/Instrumentation/BlackList.h
new file mode 100644
index 0000000..73977fc
--- /dev/null
+++ b/lib/Transforms/Instrumentation/BlackList.h
@@ -0,0 +1,55 @@
+//===-- BlackList.h - blacklist for sanitizers ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
+//
+// This is a utility class for instrumentation passes (like AddressSanitizer
+// or ThreadSanitizer) to avoid instrumenting some functions or global
+// variables based on a user-supplied blacklist.
+//
+// The blacklist disables instrumentation of various functions and global
+// variables.  Each line contains a prefix, followed by a wild card expression.
+// ---
+// fun:*_ZN4base6subtle*
+// global:*global_with_bad_access_or_initialization*
+// global-init:*global_with_initialization_issues*
+// src:file_with_tricky_code.cc
+// ---
+// Note that the wild card is in fact an llvm::Regex, but * is automatically
+// replaced with .*
+// This is similar to the "ignore" feature of ThreadSanitizer.
+// http://code.google.com/p/data-race-test/wiki/ThreadSanitizerIgnores
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include "llvm/ADT/StringMap.h"
+
+namespace llvm {
+class Function;
+class GlobalVariable;
+class Module;
+class Regex;
+class StringRef;
+
+class BlackList {
+ public:
+  BlackList(const StringRef Path);
+  // Returns whether either this function or it's source file are blacklisted.
+  bool isIn(const Function &F);
+  // Returns whether either this global or it's source file are blacklisted.
+  bool isIn(const GlobalVariable &G);
+  // Returns whether this module is blacklisted by filename.
+  bool isIn(const Module &M);
+  // Returns whether a global should be excluded from initialization checking.
+  bool isInInit(const GlobalVariable &G);
+ private:
+  StringMap<Regex*> Entries;
+
+  bool inSection(const StringRef Section, const StringRef Query);
+};
+
+}  // namespace llvm
diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp
index 09e0f14..6429081 100644
--- a/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Support/TargetFolder.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Instrumentation.h"
 using namespace llvm;
 
@@ -48,10 +49,12 @@ namespace {
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<TargetData>();
+      AU.addRequired<TargetLibraryInfo>();
     }
 
   private:
     const TargetData *TD;
+    const TargetLibraryInfo *TLI;
     ObjectSizeOffsetEvaluator *ObjSizeEval;
     BuilderTy *Builder;
     Instruction *Inst;
@@ -166,11 +169,12 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) {
 
 bool BoundsChecking::runOnFunction(Function &F) {
   TD = &getAnalysis<TargetData>();
+  TLI = &getAnalysis<TargetLibraryInfo>();
 
   TrapBB = 0;
   BuilderTy TheBuilder(F.getContext(), TargetFolder(TD));
   Builder = &TheBuilder;
-  ObjectSizeOffsetEvaluator TheObjSizeEval(TD, F.getContext());
+  ObjectSizeOffsetEvaluator TheObjSizeEval(TD, TLI, F.getContext());
   ObjSizeEval = &TheObjSizeEval;
 
   // check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory
diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt
index 00de882..058f68c 100644
--- a/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -1,8 +1,8 @@
 add_llvm_library(LLVMInstrumentation
   AddressSanitizer.cpp
+  BlackList.cpp
   BoundsChecking.cpp
   EdgeProfiling.cpp
-  FunctionBlackList.cpp
   GCOVProfiling.cpp
   Instrumentation.cpp
   OptimalEdgeProfiling.cpp
diff --git a/lib/Transforms/Instrumentation/FunctionBlackList.cpp b/lib/Transforms/Instrumentation/FunctionBlackList.cpp
deleted file mode 100644
index 188ea4d..0000000
--- a/lib/Transforms/Instrumentation/FunctionBlackList.cpp
+++ /dev/null
@@ -1,79 +0,0 @@
-//===-- FunctionBlackList.cpp - blacklist of functions --------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This is a utility class for instrumentation passes (like AddressSanitizer 
-// or ThreadSanitizer) to avoid instrumenting some functions based on
-// user-supplied blacklist.
-//
-//===----------------------------------------------------------------------===//
-
-#include "FunctionBlackList.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Function.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Regex.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/system_error.h"
-
-namespace llvm {
-
-FunctionBlackList::FunctionBlackList(const std::string &Path) {
-  Functions = NULL;
-  const char *kFunPrefix = "fun:";
-  if (!Path.size()) return;
-  std::string Fun;
-
-  OwningPtr<MemoryBuffer> File;
-  if (error_code EC = MemoryBuffer::getFile(Path.c_str(), File)) {
-    report_fatal_error("Can't open blacklist file " + Path + ": " +
-                       EC.message());
-  }
-  MemoryBuffer *Buff = File.take();
-  const char *Data = Buff->getBufferStart();
-  size_t DataLen = Buff->getBufferSize();
-  SmallVector<StringRef, 16> Lines;
-  SplitString(StringRef(Data, DataLen), Lines, "\n\r");
-  for (size_t i = 0, numLines = Lines.size(); i < numLines; i++) {
-    if (Lines[i].startswith(kFunPrefix)) {
-      std::string ThisFunc = Lines[i].substr(strlen(kFunPrefix));
-      std::string ThisFuncRE;
-      // add ThisFunc replacing * with .*
-      for (size_t j = 0, n = ThisFunc.size(); j < n; j++) {
-        if (ThisFunc[j] == '*')
-          ThisFuncRE += '.';
-        ThisFuncRE += ThisFunc[j];
-      }
-      // Check that the regexp is valid.
-      Regex CheckRE(ThisFuncRE);
-      std::string Error;
-      if (!CheckRE.isValid(Error))
-        report_fatal_error("malformed blacklist regex: " + ThisFunc +
-                           ": " + Error);
-      // Append to the final regexp.
-      if (Fun.size())
-        Fun += "|";
-      Fun += ThisFuncRE;
-    }
-  }
-  if (Fun.size()) {
-    Functions = new Regex(Fun);
-  }
-}
-
-bool FunctionBlackList::isIn(const Function &F) {
-  if (Functions) {
-    bool Res = Functions->match(F.getName());
-    return Res;
-  }
-  return false;
-}
-
-}  // namespace llvm
diff --git a/lib/Transforms/Instrumentation/FunctionBlackList.h b/lib/Transforms/Instrumentation/FunctionBlackList.h
deleted file mode 100644
index c1239b9..0000000
--- a/lib/Transforms/Instrumentation/FunctionBlackList.h
+++ /dev/null
@@ -1,37 +0,0 @@
-//===-- FunctionBlackList.cpp - blacklist of functions ----------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//===----------------------------------------------------------------------===//
-//
-// This is a utility class for instrumentation passes (like AddressSanitizer
-// or ThreadSanitizer) to avoid instrumenting some functions based on
-// user-supplied blacklist.
-//
-//===----------------------------------------------------------------------===//
-//
-
-#include <string>
-
-namespace llvm {
-class Function;
-class Regex;
-
-// Blacklisted functions are not instrumented.
-// The blacklist file contains one or more lines like this:
-// ---
-// fun:FunctionWildCard
-// ---
-// This is similar to the "ignore" feature of ThreadSanitizer.
-// http://code.google.com/p/data-race-test/wiki/ThreadSanitizerIgnores
-class FunctionBlackList {
- public:
-  FunctionBlackList(const std::string &Path);
-  bool isIn(const Function &F);
- private:
-  Regex *Functions;
-};
-
-}  // namespace llvm
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 264a6a6..9fcde31 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -88,11 +88,10 @@ namespace {
 
     // Add the function to write out all our counters to the global destructor
     // list.
-    void insertCounterWriteout(SmallVector<std::pair<GlobalVariable *,
-                                                     MDNode *>, 8> &);
+    void insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*, MDNode*> >);
     void insertIndirectCounterIncrement();
 
-    std::string mangleName(DICompileUnit CU, std::string NewStem);
+    std::string mangleName(DICompileUnit CU, const char *NewStem);
 
     bool EmitNotes;
     bool EmitData;
@@ -329,7 +328,7 @@ namespace {
   };
 }
 
-std::string GCOVProfiler::mangleName(DICompileUnit CU, std::string NewStem) {
+std::string GCOVProfiler::mangleName(DICompileUnit CU, const char *NewStem) {
   if (NamedMDNode *GCov = M->getNamedMetadata("llvm.gcov")) {
     for (int i = 0, e = GCov->getNumOperands(); i != e; ++i) {
       MDNode *N = GCov->getOperand(i);
@@ -630,7 +629,7 @@ GlobalVariable *GCOVProfiler::getEdgeStateValue() {
 }
 
 void GCOVProfiler::insertCounterWriteout(
-    SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> &CountersBySP) {
+    ArrayRef<std::pair<GlobalVariable *, MDNode *> > CountersBySP) {
   FunctionType *WriteoutFTy =
       FunctionType::get(Type::getVoidTy(*Ctx), false);
   Function *WriteoutF = Function::Create(WriteoutFTy,
@@ -652,7 +651,7 @@ void GCOVProfiler::insertCounterWriteout(
       std::string FilenameGcda = mangleName(compile_unit, "gcda");
       Builder.CreateCall(StartFile,
                          Builder.CreateGlobalStringPtr(FilenameGcda));
-      for (SmallVector<std::pair<GlobalVariable *, MDNode *>, 8>::iterator
+      for (ArrayRef<std::pair<GlobalVariable *, MDNode *> >::iterator
              I = CountersBySP.begin(), E = CountersBySP.end();
            I != E; ++I) {
         DISubprogram SP(I->second);
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index dc0fa71..17b7775 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -21,7 +21,7 @@
 
 #define DEBUG_TYPE "tsan"
 
-#include "FunctionBlackList.h"
+#include "BlackList.h"
 #include "llvm/Function.h"
 #include "llvm/IRBuilder.h"
 #include "llvm/Intrinsics.h"
@@ -50,7 +50,7 @@ static cl::opt<std::string>  ClBlackListFile("tsan-blacklist",
 
 STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
 STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
-STATISTIC(NumOmittedReadsBeforeWrite, 
+STATISTIC(NumOmittedReadsBeforeWrite,
           "Number of reads ignored due to following writes");
 STATISTIC(NumAccessesWithBadSize, "Number of accesses with bad size");
 STATISTIC(NumInstrumentedVtableWrites, "Number of vtable ptr writes");
@@ -77,7 +77,7 @@ struct ThreadSanitizer : public FunctionPass {
   int getMemoryAccessFuncIndex(Value *Addr);
 
   TargetData *TD;
-  OwningPtr<FunctionBlackList> BL;
+  OwningPtr<BlackList> BL;
   IntegerType *OrdTy;
   // Callbacks to run-time library are computed in doInitialization.
   Function *TsanFuncEntry;
@@ -121,7 +121,7 @@ bool ThreadSanitizer::doInitialization(Module &M) {
   TD = getAnalysisIfAvailable<TargetData>();
   if (!TD)
     return false;
-  BL.reset(new FunctionBlackList(ClBlackListFile));
+  BL.reset(new BlackList(ClBlackListFile));
 
   // Always insert a call to __tsan_init into the module's CTORs.
   IRBuilder<> IRB(M.getContext());
@@ -186,7 +186,7 @@ bool ThreadSanitizer::addrPointsToConstantData(Value *Addr) {
       NumOmittedReadsFromConstantGlobals++;
       return true;
     }
-  } else if(LoadInst *L = dyn_cast<LoadInst>(Addr)) {
+  } else if (LoadInst *L = dyn_cast<LoadInst>(Addr)) {
     if (isVtableAccess(L)) {
       // Reads from a vtable pointer can not race with any writes.
       NumOmittedReadsFromVtable++;
@@ -344,7 +344,7 @@ static ConstantInt *createOrdering(IRBuilder<> *IRB, AtomicOrdering ord) {
     case NotAtomic:              assert(false);
     case Unordered:              // Fall-through.
     case Monotonic:              v = 1 << 0; break;
- // case Consume:                v = 1 << 1; break;  // Not specified yet.
+    // case Consume:                v = 1 << 1; break;  // Not specified yet.
     case Acquire:                v = 1 << 2; break;
     case Release:                v = 1 << 3; break;
     case AcquireRelease:         v = 1 << 4; break;
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index a8deda8..5912107 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -43,6 +43,7 @@
 #include "llvm/Transforms/Utils/AddrModeMatcher.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Transforms/Utils/BypassSlowDivision.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 using namespace llvm::PatternMatch;
@@ -148,7 +149,19 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
   PFI = getAnalysisIfAvailable<ProfileInfo>();
   OptSize = F.hasFnAttr(Attribute::OptimizeForSize);
 
-  // First pass, eliminate blocks that contain only PHI nodes and an
+  /// This optimization identifies DIV instructions that can be
+  /// profitably bypassed and carried out with a shorter, faster divide.
+  if (TLI && TLI->isSlowDivBypassed()) {
+    const DenseMap<Type *, Type *> &BypassTypeMap = TLI->getBypassSlowDivTypes();
+
+    for (Function::iterator I = F.begin(); I != F.end(); I++) {
+      EverMadeChange |= bypassSlowDivision(F,
+                                           I,
+                                           BypassTypeMap);
+    }
+  }
+
+  // Eliminate blocks that contain only PHI nodes and an
   // unconditional branch.
   EverMadeChange |= EliminateMostlyEmptyBlocks(F);
 
@@ -988,7 +1001,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
     WeakVH IterHandle(CurInstIterator);
     BasicBlock *BB = CurInstIterator->getParent();
 
-    RecursivelyDeleteTriviallyDeadInstructions(Repl);
+    RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);
 
     if (IterHandle != CurInstIterator) {
       // If the iterator instruction was recursively deleted, start over at the
@@ -1174,17 +1187,32 @@ static bool isFormingBranchFromSelectProfitable(SelectInst *SI) {
 }
 
 
+/// If we have a SelectInst that will likely profit from branch prediction,
+/// turn it into a branch.
 bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) {
-  // If we have a SelectInst that will likely profit from branch prediction,
-  // turn it into a branch.
-  if (DisableSelectToBranch || OptSize || !TLI ||
-      !TLI->isPredictableSelectExpensive())
-    return false;
+  bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
 
-  if (!SI->getCondition()->getType()->isIntegerTy(1) ||
-      !isFormingBranchFromSelectProfitable(SI))
+  // Can we convert the 'select' to CF ?
+  if (DisableSelectToBranch || OptSize || !TLI || VectorCond)
     return false;
 
+  TargetLowering::SelectSupportKind SelectKind;
+  if (VectorCond)
+    SelectKind = TargetLowering::VectorMaskSelect;
+  else if (SI->getType()->isVectorTy())
+    SelectKind = TargetLowering::ScalarCondVectorVal;
+  else
+    SelectKind = TargetLowering::ScalarValSelect;
+
+  // Do we have efficient codegen support for this kind of 'selects' ?
+  if (TLI->isSelectSupported(SelectKind)) {
+    // We have efficient codegen support for the select instruction.
+    // Check if it is profitable to keep this 'select'.
+    if (!TLI->isPredictableSelectExpensive() ||
+        !isFormingBranchFromSelectProfitable(SI))
+      return false;
+  }
+
   ModifiedDT = true;
 
   // First, we split the block containing the select into 2 blocks.
diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp
index 8dbcc23..086f0a1 100644
--- a/lib/Transforms/Scalar/DCE.cpp
+++ b/lib/Transforms/Scalar/DCE.cpp
@@ -22,6 +22,7 @@
 #include "llvm/Instruction.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/InstIterator.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
@@ -38,10 +39,11 @@ namespace {
       initializeDeadInstEliminationPass(*PassRegistry::getPassRegistry());
     }
     virtual bool runOnBasicBlock(BasicBlock &BB) {
+      TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
       bool Changed = false;
       for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) {
         Instruction *Inst = DI++;
-        if (isInstructionTriviallyDead(Inst)) {
+        if (isInstructionTriviallyDead(Inst, TLI)) {
           Inst->eraseFromParent();
           Changed = true;
           ++DIEEliminated;
@@ -87,6 +89,8 @@ char DCE::ID = 0;
 INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false)
 
 bool DCE::runOnFunction(Function &F) {
+  TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
+
   // Start out with all of the instructions in the worklist...
   std::vector<Instruction*> WorkList;
   for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i)
@@ -101,7 +105,7 @@ bool DCE::runOnFunction(Function &F) {
     Instruction *I = WorkList.back();
     WorkList.pop_back();
 
-    if (isInstructionTriviallyDead(I)) {       // If the instruction is dead.
+    if (isInstructionTriviallyDead(I, TLI)) { // If the instruction is dead.
       // Loop over all of the values that the instruction uses, if there are
       // instructions being used, add them to the worklist, because they might
       // go dead after this one is removed.
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 8b1283f..1ff4329 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -106,6 +106,7 @@ FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
 ///
 static void DeleteDeadInstruction(Instruction *I,
                                   MemoryDependenceAnalysis &MD,
+                                  const TargetLibraryInfo *TLI,
                                   SmallSetVector<Value*, 16> *ValueSet = 0) {
   SmallVector<Instruction*, 32> NowDeadInsts;
 
@@ -130,7 +131,7 @@ static void DeleteDeadInstruction(Instruction *I,
       if (!Op->use_empty()) continue;
 
       if (Instruction *OpI = dyn_cast<Instruction>(Op))
-        if (isInstructionTriviallyDead(OpI))
+        if (isInstructionTriviallyDead(OpI, TLI))
           NowDeadInsts.push_back(OpI);
     }
 
@@ -276,7 +277,7 @@ static Value *getStoredPointerOperand(Instruction *I) {
 
 static uint64_t getPointerSize(const Value *V, AliasAnalysis &AA) {
   uint64_t Size;
-  if (getObjectSize(V, Size, AA.getTargetData()))
+  if (getObjectSize(V, Size, AA.getTargetData(), AA.getTargetLibraryInfo()))
     return Size;
   return AliasAnalysis::UnknownSize;
 }
@@ -454,7 +455,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
     Instruction *Inst = BBI++;
 
     // Handle 'free' calls specially.
-    if (CallInst *F = isFreeCall(Inst)) {
+    if (CallInst *F = isFreeCall(Inst, AA->getTargetLibraryInfo())) {
       MadeChange |= HandleFree(F);
       continue;
     }
@@ -483,7 +484,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
           // in case we need it.
           WeakVH NextInst(BBI);
 
-          DeleteDeadInstruction(SI, *MD);
+          DeleteDeadInstruction(SI, *MD, AA->getTargetLibraryInfo());
 
           if (NextInst == 0)  // Next instruction deleted.
             BBI = BB.begin();
@@ -530,7 +531,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
                 << *DepWrite << "\n  KILLER: " << *Inst << '\n');
 
           // Delete the store and now-dead instructions that feed it.
-          DeleteDeadInstruction(DepWrite, *MD);
+          DeleteDeadInstruction(DepWrite, *MD, AA->getTargetLibraryInfo());
           ++NumFastStores;
           MadeChange = true;
 
@@ -640,7 +641,7 @@ bool DSE::HandleFree(CallInst *F) {
       Instruction *Next = llvm::next(BasicBlock::iterator(Dependency));
 
       // DCE instructions only used to calculate that store
-      DeleteDeadInstruction(Dependency, *MD);
+      DeleteDeadInstruction(Dependency, *MD, AA->getTargetLibraryInfo());
       ++NumFastStores;
       MadeChange = true;
 
@@ -680,7 +681,8 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
 
     // Okay, so these are dead heap objects, but if the pointer never escapes
     // then it's leaked by this function anyways.
-    else if (isAllocLikeFn(I) && !PointerMayBeCaptured(I, true, true))
+    else if (isAllocLikeFn(I, AA->getTargetLibraryInfo()) &&
+             !PointerMayBeCaptured(I, true, true))
       DeadStackObjects.insert(I);
   }
 
@@ -724,7 +726,8 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
               dbgs() << '\n');
 
         // DCE instructions only used to calculate that store.
-        DeleteDeadInstruction(Dead, *MD, &DeadStackObjects);
+        DeleteDeadInstruction(Dead, *MD, AA->getTargetLibraryInfo(),
+                              &DeadStackObjects);
         ++NumFastStores;
         MadeChange = true;
         continue;
@@ -732,9 +735,10 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
     }
 
     // Remove any dead non-memory-mutating instructions.
-    if (isInstructionTriviallyDead(BBI)) {
+    if (isInstructionTriviallyDead(BBI, AA->getTargetLibraryInfo())) {
       Instruction *Inst = BBI++;
-      DeleteDeadInstruction(Inst, *MD, &DeadStackObjects);
+      DeleteDeadInstruction(Inst, *MD, AA->getTargetLibraryInfo(),
+                            &DeadStackObjects);
       ++NumFastOther;
       MadeChange = true;
       continue;
@@ -750,7 +754,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
     if (CallSite CS = cast<Value>(BBI)) {
       // Remove allocation function calls from the list of dead stack objects; 
       // there can't be any references before the definition.
-      if (isAllocLikeFn(BBI))
+      if (isAllocLikeFn(BBI, AA->getTargetLibraryInfo()))
         DeadStackObjects.remove(BBI);
 
       // If this call does not access memory, it can't be loading any of our
@@ -771,15 +775,15 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
           LiveAllocas.push_back(*I);
       }
 
-      for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(),
-           E = LiveAllocas.end(); I != E; ++I)
-        DeadStackObjects.remove(*I);
-
       // If all of the allocas were clobbered by the call then we're not going
       // to find anything else to process.
-      if (DeadStackObjects.empty())
+      if (DeadStackObjects.size() == LiveAllocas.size())
         break;
 
+      for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(),
+           E = LiveAllocas.end(); I != E; ++I)
+        DeadStackObjects.remove(*I);
+
       continue;
     }
 
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 9759549..2627113 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -374,7 +374,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
     Instruction *Inst = I++;
 
     // Dead instructions should just be removed.
-    if (isInstructionTriviallyDead(Inst)) {
+    if (isInstructionTriviallyDead(Inst, TLI)) {
       DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
       Inst->eraseFromParent();
       Changed = true;
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 4822fd0..16ae6ad 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -271,16 +271,16 @@ void ValueTable::add(Value *V, uint32_t num) {
   valueNumbering.insert(std::make_pair(V, num));
 }
 
-uint32_t ValueTable::lookup_or_add_call(CallInst* C) {
+uint32_t ValueTable::lookup_or_add_call(CallInst *C) {
   if (AA->doesNotAccessMemory(C)) {
     Expression exp = create_expression(C);
-    uint32_t& e = expressionNumbering[exp];
+    uint32_t &e = expressionNumbering[exp];
     if (!e) e = nextValueNumber++;
     valueNumbering[C] = e;
     return e;
   } else if (AA->onlyReadsMemory(C)) {
     Expression exp = create_expression(C);
-    uint32_t& e = expressionNumbering[exp];
+    uint32_t &e = expressionNumbering[exp];
     if (!e) {
       e = nextValueNumber++;
       valueNumbering[C] = e;
@@ -413,7 +413,7 @@ uint32_t ValueTable::lookup_or_add(Value *V) {
     case Instruction::LShr:
     case Instruction::AShr:
     case Instruction::And:
-    case Instruction::Or :
+    case Instruction::Or:
     case Instruction::Xor:
     case Instruction::ICmp:
     case Instruction::FCmp:
@@ -632,6 +632,7 @@ INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false)
 
+#ifndef NDEBUG
 void GVN::dump(DenseMap<uint32_t, Value*>& d) {
   errs() << "{\n";
   for (DenseMap<uint32_t, Value*>::iterator I = d.begin(),
@@ -641,6 +642,7 @@ void GVN::dump(DenseMap<uint32_t, Value*>& d) {
   }
   errs() << "}\n";
 }
+#endif
 
 /// IsValueFullyAvailableInBlock - Return true if we can prove that the value
 /// we're analyzing is fully available in the specified block.  As we go, keep
@@ -1436,7 +1438,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
     Instruction *DepInst = DepInfo.getInst();
 
     // Loading the allocation -> undef.
-    if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst) ||
+    if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI) ||
         // Loading immediately after lifetime begin -> undef.
         isLifetimeStart(DepInst)) {
       ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
@@ -1951,7 +1953,7 @@ bool GVN::processLoad(LoadInst *L) {
   // If this load really doesn't depend on anything, then we must be loading an
   // undef value.  This can happen when loading for a fresh allocation with no
   // intervening stores, for example.
-  if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst)) {
+  if (isa<AllocaInst>(DepInst) || isMallocLikeFn(DepInst, TLI)) {
     L->replaceAllUsesWith(UndefValue::get(L->getType()));
     markInstructionForDeletion(L);
     ++NumGVNLoad;
@@ -2231,12 +2233,20 @@ bool GVN::processInstruction(Instruction *I) {
     Value *SwitchCond = SI->getCondition();
     BasicBlock *Parent = SI->getParent();
     bool Changed = false;
+
+    // Remember how many outgoing edges there are to every successor.
+    SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges;
+    for (unsigned i = 0, n = SI->getNumSuccessors(); i != n; ++i)
+      ++SwitchEdges[SI->getSuccessor(i)];
+
     for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
          i != e; ++i) {
       BasicBlock *Dst = i.getCaseSuccessor();
-      BasicBlockEdge E(Parent, Dst);
-      if (E.isSingleEdge())
+      // If there is only a single edge, propagate the case value into it.
+      if (SwitchEdges.lookup(Dst) == 1) {
+        BasicBlockEdge E(Parent, Dst);
         Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E);
+      }
     }
     return Changed;
   }
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 37f8bdf..c933a17 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -44,6 +44,7 @@
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
@@ -68,6 +69,7 @@ namespace {
     ScalarEvolution *SE;
     DominatorTree   *DT;
     TargetData      *TD;
+    TargetLibraryInfo *TLI;
 
     SmallVector<WeakVH, 16> DeadInsts;
     bool Changed;
@@ -414,11 +416,11 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
   // new comparison.
   NewCompare->takeName(Compare);
   Compare->replaceAllUsesWith(NewCompare);
-  RecursivelyDeleteTriviallyDeadInstructions(Compare);
+  RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI);
 
   // Delete the old floating point increment.
   Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
-  RecursivelyDeleteTriviallyDeadInstructions(Incr);
+  RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI);
 
   // If the FP induction variable still has uses, this is because something else
   // in the loop uses its value.  In order to canonicalize the induction
@@ -431,7 +433,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
                                  PN->getParent()->getFirstInsertionPt());
     PN->replaceAllUsesWith(Conv);
-    RecursivelyDeleteTriviallyDeadInstructions(PN);
+    RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
   }
   Changed = true;
 }
@@ -550,14 +552,14 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
         PN->setIncomingValue(i, ExitVal);
 
         // If this instruction is dead now, delete it.
-        RecursivelyDeleteTriviallyDeadInstructions(Inst);
+        RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
 
         if (NumPreds == 1) {
           // Completely replace a single-pred PHI. This is safe, because the
           // NewVal won't be variant in the loop, so we don't need an LCSSA phi
           // node anymore.
           PN->replaceAllUsesWith(ExitVal);
-          RecursivelyDeleteTriviallyDeadInstructions(PN);
+          RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
         }
       }
       if (NumPreds != 1) {
@@ -1697,6 +1699,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   SE = &getAnalysis<ScalarEvolution>();
   DT = &getAnalysis<DominatorTree>();
   TD = getAnalysisIfAvailable<TargetData>();
+  TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
 
   DeadInsts.clear();
   Changed = false;
@@ -1763,7 +1766,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   while (!DeadInsts.empty())
     if (Instruction *Inst =
           dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
-      RecursivelyDeleteTriviallyDeadInstructions(Inst);
+      RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
 
   // The Rewriter may not be used from this point on.
 
@@ -1772,7 +1775,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   SinkUnusedInvariants(L);
 
   // Clean up dead instructions.
-  Changed |= DeleteDeadPHIs(L->getHeader());
+  Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
   // Check a post-condition.
   assert(L->isLCSSAForm(*DT) &&
          "Indvars did not leave the loop in lcssa form!");
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index dd42c59..20844c6 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -1455,7 +1455,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
   // At this point, the IR is fully up to date and consistent.  Do a quick scan
   // over the new instructions and zap any that are constants or dead.  This
   // frequently happens because of phi translation.
-  SimplifyInstructionsInBlock(NewBB, TD);
+  SimplifyInstructionsInBlock(NewBB, TD, TLI);
 
   // Threaded an edge!
   ++NumThreads;
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 0192e92..99bedce 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -108,6 +108,9 @@ namespace {
     BasicBlock *Preheader;   // The preheader block of the current loop...
     Loop *CurLoop;           // The current loop we are working on...
     AliasSetTracker *CurAST; // AliasSet information for the current loop...
+    bool MayThrow;           // The current loop contains an instruction which
+                             // may throw, thus preventing code motion of
+                             // instructions with side effects.
     DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap;
 
     /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
@@ -240,6 +243,15 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
       CurAST->add(*BB);                 // Incorporate the specified basic block
   }
 
+  MayThrow = false;
+  // TODO: We've already searched for instructions which may throw in subloops.
+  // We may want to reuse this information.
+  for (Loop::block_iterator BB = L->block_begin(), BBE = L->block_end();
+       (BB != BBE) && !MayThrow ; ++BB)
+    for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end();
+         (I != E) && !MayThrow; ++I)
+      MayThrow |= I->mayThrow();
+
   // We want to visit all of the instructions in this loop... that are not parts
   // of our subloops (they have already had their invariants hoisted out of
   // their loop, into this loop, so there is no need to process the BODIES of
@@ -307,7 +319,7 @@ void LICM::SinkRegion(DomTreeNode *N) {
 
     // If the instruction is dead, we would try to sink it because it isn't used
     // in the loop, instead, just delete it.
-    if (isInstructionTriviallyDead(&I)) {
+    if (isInstructionTriviallyDead(&I, TLI)) {
       DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
       ++II;
       CurAST->deleteValue(&I);
@@ -418,17 +430,22 @@ bool LICM::canSinkOrHoistInst(Instruction &I) {
       if (!FoundMod) return true;
     }
 
-    // FIXME: This should use mod/ref information to see if we can hoist or sink
-    // the call.
+    // FIXME: This should use mod/ref information to see if we can hoist or
+    // sink the call.
 
     return false;
   }
 
-  // Otherwise these instructions are hoistable/sinkable
-  return isa<BinaryOperator>(I) || isa<CastInst>(I) ||
-         isa<SelectInst>(I) || isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
-         isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
-         isa<ShuffleVectorInst>(I);
+  // Only these instructions are hoistable/sinkable.
+  bool HoistableKind = (isa<BinaryOperator>(I) || isa<CastInst>(I) ||
+                            isa<SelectInst>(I) || isa<GetElementPtrInst>(I) ||
+                            isa<CmpInst>(I)    || isa<InsertElementInst>(I) ||
+                            isa<ExtractElementInst>(I) ||
+                            isa<ShuffleVectorInst>(I));
+  if (!HoistableKind)
+      return false;
+
+  return isSafeToExecuteUnconditionally(I);
 }
 
 /// isNotUsedInLoop - Return true if the only users of this instruction are
@@ -604,6 +621,12 @@ bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
 }
 
 bool LICM::isGuaranteedToExecute(Instruction &Inst) {
+
+  // Somewhere in this loop there is an instruction which may throw and make us
+  // exit the loop.
+  if (MayThrow)
+    return false;
+
   // Otherwise we have to check to make sure that the instruction dominates all
   // of the exit blocks.  If it doesn't, then there is a path out of the loop
   // which does not execute this instruction, so we can't hoist it.
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index ac1082c..a72e288 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -132,7 +132,8 @@ Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); }
 /// and zero out all the operands of this instruction.  If any of them become
 /// dead, delete them and the computation tree that feeds them.
 ///
-static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) {
+static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE,
+                                  const TargetLibraryInfo *TLI) {
   SmallVector<Instruction*, 32> NowDeadInsts;
 
   NowDeadInsts.push_back(I);
@@ -153,7 +154,7 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) {
       if (!Op->use_empty()) continue;
 
       if (Instruction *OpI = dyn_cast<Instruction>(Op))
-        if (isInstructionTriviallyDead(OpI))
+        if (isInstructionTriviallyDead(OpI, TLI))
           NowDeadInsts.push_back(OpI);
     }
 
@@ -164,10 +165,11 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) {
 
 /// deleteIfDeadInstruction - If the specified value is a dead instruction,
 /// delete it and any recursively used instructions.
-static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE) {
+static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE,
+                                    const TargetLibraryInfo *TLI) {
   if (Instruction *I = dyn_cast<Instruction>(V))
-    if (isInstructionTriviallyDead(I))
-      deleteDeadInstruction(I, SE);
+    if (isInstructionTriviallyDead(I, TLI))
+      deleteDeadInstruction(I, SE, TLI);
 }
 
 bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
@@ -490,7 +492,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
                             StoreSize, getAnalysis<AliasAnalysis>(), TheStore)){
     Expander.clear();
     // If we generated new code for the base pointer, clean up.
-    deleteIfDeadInstruction(BasePtr, *SE);
+    deleteIfDeadInstruction(BasePtr, *SE, TLI);
     return false;
   }
 
@@ -538,7 +540,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
 
   // Okay, the memset has been formed.  Zap the original store and anything that
   // feeds into it.
-  deleteDeadInstruction(TheStore, *SE);
+  deleteDeadInstruction(TheStore, *SE, TLI);
   ++NumMemSet;
   return true;
 }
@@ -579,7 +581,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
                             getAnalysis<AliasAnalysis>(), SI)) {
     Expander.clear();
     // If we generated new code for the base pointer, clean up.
-    deleteIfDeadInstruction(StoreBasePtr, *SE);
+    deleteIfDeadInstruction(StoreBasePtr, *SE, TLI);
     return false;
   }
 
@@ -594,8 +596,8 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
                             StoreSize, getAnalysis<AliasAnalysis>(), SI)) {
     Expander.clear();
     // If we generated new code for the base pointer, clean up.
-    deleteIfDeadInstruction(LoadBasePtr, *SE);
-    deleteIfDeadInstruction(StoreBasePtr, *SE);
+    deleteIfDeadInstruction(LoadBasePtr, *SE, TLI);
+    deleteIfDeadInstruction(StoreBasePtr, *SE, TLI);
     return false;
   }
 
@@ -628,7 +630,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
 
   // Okay, the memset has been formed.  Zap the original store and anything that
   // feeds into it.
-  deleteDeadInstruction(SI, *SE);
+  deleteDeadInstruction(SI, *SE, TLI);
   ++NumMemCpy;
   return true;
 }
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
index 982400c..f5daa7b 100644
--- a/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -120,7 +120,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
             ++NumSimplified;
           }
         }
-        LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I);
+        LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
 
         if (IsSubloopHeader && !isa<PHINode>(I))
           break;
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 7eeb152..abe07aa 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;
@@ -256,6 +257,7 @@ bool LoopRotate::rotateLoop(Loop *L) {
     return false;
 
   BasicBlock *OrigHeader = L->getHeader();
+  BasicBlock *OrigLatch = L->getLoopLatch();
 
   BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
   if (BI == 0 || BI->isUnconditional())
@@ -267,13 +269,9 @@ bool LoopRotate::rotateLoop(Loop *L) {
   if (!L->isLoopExiting(OrigHeader))
     return false;
 
-  // Updating PHInodes in loops with multiple exits adds complexity.
-  // Keep it simple, and restrict loop rotation to loops with one exit only.
-  // In future, lift this restriction and support for multiple exits if
-  // required.
-  SmallVector<BasicBlock*, 8> ExitBlocks;
-  L->getExitBlocks(ExitBlocks);
-  if (ExitBlocks.size() > 1)
+  // If the loop latch already contains a branch that leaves the loop then the
+  // loop is already rotated.
+  if (OrigLatch == 0 || L->isLoopExiting(OrigLatch))
     return false;
 
   // Check size of original header and reject loop if it is very big.
@@ -286,11 +284,10 @@ bool LoopRotate::rotateLoop(Loop *L) {
 
   // Now, this loop is suitable for rotation.
   BasicBlock *OrigPreheader = L->getLoopPreheader();
-  BasicBlock *OrigLatch = L->getLoopLatch();
 
   // If the loop could not be converted to canonical form, it must have an
   // indirectbr in it, just give up.
-  if (OrigPreheader == 0 || OrigLatch == 0)
+  if (OrigPreheader == 0)
     return false;
 
   // Anything ScalarEvolution may know about this loop or the PHI nodes
@@ -298,6 +295,8 @@ bool LoopRotate::rotateLoop(Loop *L) {
   if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
     SE->forgetLoop(L);
 
+  DEBUG(dbgs() << "LoopRotation: rotating "; L->dump());
+
   // Find new Loop header. NewHeader is a Header's one and only successor
   // that is inside loop.  Header's other successor is outside the
   // loop.  Otherwise loop is not suitable for rotation.
@@ -408,10 +407,19 @@ bool LoopRotate::rotateLoop(Loop *L) {
     // Update DominatorTree to reflect the CFG change we just made.  Then split
     // edges as necessary to preserve LoopSimplify form.
     if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
-      // Since OrigPreheader now has the conditional branch to Exit block, it is
-      // the dominator of Exit.
-      DT->changeImmediateDominator(Exit, OrigPreheader);
-      DT->changeImmediateDominator(NewHeader, OrigPreheader);
+      // Everything that was dominated by the old loop header is now dominated
+      // by the original loop preheader. Conceptually the header was merged
+      // into the preheader, even though we reuse the actual block as a new
+      // loop latch.
+      DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader);
+      SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(),
+                                                   OrigHeaderNode->end());
+      DomTreeNode *OrigPreheaderNode = DT->getNode(OrigPreheader);
+      for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I)
+        DT->changeImmediateDominator(HeaderChildren[I], OrigPreheaderNode);
+
+      assert(DT->getNode(Exit)->getIDom() == OrigPreheaderNode);
+      assert(DT->getNode(NewHeader)->getIDom() == OrigPreheaderNode);
 
       // Update OrigHeader to be dominated by the new header block.
       DT->changeImmediateDominator(OrigHeader, OrigLatch);
@@ -440,6 +448,35 @@ bool LoopRotate::rotateLoop(Loop *L) {
       // Update OrigHeader to be dominated by the new header block.
       DT->changeImmediateDominator(NewHeader, OrigPreheader);
       DT->changeImmediateDominator(OrigHeader, OrigLatch);
+
+      // Brute force incremental dominator tree update. Call
+      // findNearestCommonDominator on all CFG predecessors of each child of the
+      // original header.
+      DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader);
+      SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(),
+                                                   OrigHeaderNode->end());
+      bool Changed;
+      do {
+        Changed = false;
+        for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I) {
+          DomTreeNode *Node = HeaderChildren[I];
+          BasicBlock *BB = Node->getBlock();
+
+          pred_iterator PI = pred_begin(BB);
+          BasicBlock *NearestDom = *PI;
+          for (pred_iterator PE = pred_end(BB); PI != PE; ++PI)
+            NearestDom = DT->findNearestCommonDominator(NearestDom, *PI);
+
+          // Remember if this changes the DomTree.
+          if (Node->getIDom()->getBlock() != NearestDom) {
+            DT->changeImmediateDominator(BB, NearestDom);
+            Changed = true;
+          }
+        }
+
+      // If the dominator changed, this may have an effect on other
+      // predecessors, continue until we reach a fixpoint.
+      } while (Changed);
     }
   }
 
@@ -452,6 +489,8 @@ bool LoopRotate::rotateLoop(Loop *L) {
   // emitted code isn't too gross in this common case.
   MergeBlockIntoPredecessor(OrigHeader, this);
 
+  DEBUG(dbgs() << "LoopRotation: into "; L->dump());
+
   ++NumRotated;
   return true;
 }
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 0ae7a51..d7495da 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -121,9 +121,11 @@ void RegSortData::print(raw_ostream &OS) const {
   OS << "[NumUses=" << UsedByIndices.count() << ']';
 }
 
+#ifndef NDEBUG
 void RegSortData::dump() const {
   print(errs()); errs() << '\n';
 }
+#endif
 
 namespace {
 
@@ -414,9 +416,11 @@ void Formula::print(raw_ostream &OS) const {
   }
 }
 
+#ifndef NDEBUG
 void Formula::dump() const {
   print(errs()); errs() << '\n';
 }
+#endif
 
 /// isAddRecSExtable - Return true if the given addrec can be sign-extended
 /// without changing its value.
@@ -974,9 +978,11 @@ void Cost::print(raw_ostream &OS) const {
     OS << ", plus " << SetupCost << " setup cost";
 }
 
+#ifndef NDEBUG
 void Cost::dump() const {
   print(errs()); errs() << '\n';
 }
+#endif
 
 namespace {
 
@@ -1060,9 +1066,11 @@ void LSRFixup::print(raw_ostream &OS) const {
     OS << ", Offset=" << Offset;
 }
 
+#ifndef NDEBUG
 void LSRFixup::dump() const {
   print(errs()); errs() << '\n';
 }
+#endif
 
 namespace {
 
@@ -1252,9 +1260,11 @@ void LSRUse::print(raw_ostream &OS) const {
     OS << ", widest fixup type: " << *WidestFixupType;
 }
 
+#ifndef NDEBUG
 void LSRUse::dump() const {
   print(errs()); errs() << '\n';
 }
+#endif
 
 /// isLegalUse - Test whether the use described by AM is "legal", meaning it can
 /// be completely folded into the user instruction at isel time. This includes
@@ -3436,9 +3446,11 @@ void WorkItem::print(raw_ostream &OS) const {
      << " , add offset " << Imm;
 }
 
+#ifndef NDEBUG
 void WorkItem::dump() const {
   print(errs()); errs() << '\n';
 }
+#endif
 
 /// GenerateCrossUseConstantOffsets - Look for registers which are a constant
 /// distance apart and try to form reuse opportunities between them.
@@ -4731,9 +4743,11 @@ void LSRInstance::print(raw_ostream &OS) const {
   print_uses(OS);
 }
 
+#ifndef NDEBUG
 void LSRInstance::dump() const {
   print(errs()); errs() << '\n';
 }
+#endif
 
 namespace {
 
diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp
index 3222f20..dce8e8b 100644
--- a/lib/Transforms/Scalar/ObjCARC.cpp
+++ b/lib/Transforms/Scalar/ObjCARC.cpp
@@ -1236,16 +1236,19 @@ bool ProvenanceAnalysis::relatedCheck(const Value *A, const Value *B) {
 
   // An ObjC-Identified object can't alias a load if it is never locally stored.
   if (AIsIdentified) {
+    // Check for an obvious escape.
+    if (isa<LoadInst>(B))
+      return isStoredObjCPointer(A);
     if (BIsIdentified) {
-      // If both pointers have provenance, they can be directly compared.
-      if (A != B)
-        return false;
-    } else {
-      if (isa<LoadInst>(B))
-        return isStoredObjCPointer(A);
+      // Check for an obvious escape.
+      if (isa<LoadInst>(A))
+        return isStoredObjCPointer(B);
+      // Both pointers are identified and escapes aren't an evident problem.
+      return false;
     }
-  } else {
-    if (BIsIdentified && isa<LoadInst>(A))
+  } else if (BIsIdentified) {
+    // Check for an obvious escape.
+    if (isa<LoadInst>(A))
       return isStoredObjCPointer(B);
   }
 
@@ -1381,9 +1384,6 @@ namespace {
   /// PtrState - This class summarizes several per-pointer runtime properties
   /// which are propogated through the flow graph.
   class PtrState {
-    /// NestCount - The known minimum level of retain+release nesting.
-    unsigned NestCount;
-
     /// KnownPositiveRefCount - True if the reference count is known to
     /// be incremented.
     bool KnownPositiveRefCount;
@@ -1401,7 +1401,7 @@ namespace {
     /// TODO: Encapsulate this better.
     RRInfo RRI;
 
-    PtrState() : NestCount(0), KnownPositiveRefCount(false), Partial(false),
+    PtrState() : KnownPositiveRefCount(false), Partial(false),
                  Seq(S_None) {}
 
     void SetKnownPositiveRefCount() {
@@ -1416,18 +1416,6 @@ namespace {
       return KnownPositiveRefCount;
     }
 
-    void IncrementNestCount() {
-      if (NestCount != UINT_MAX) ++NestCount;
-    }
-
-    void DecrementNestCount() {
-      if (NestCount != 0) --NestCount;
-    }
-
-    bool IsKnownNested() const {
-      return NestCount > 0;
-    }
-
     void SetSeq(Sequence NewSeq) {
       Seq = NewSeq;
     }
@@ -1454,7 +1442,6 @@ void
 PtrState::Merge(const PtrState &Other, bool TopDown) {
   Seq = MergeSeqs(Seq, Other.Seq, TopDown);
   KnownPositiveRefCount = KnownPositiveRefCount && Other.KnownPositiveRefCount;
-  NestCount = std::min(NestCount, Other.NestCount);
 
   // We can't merge a plain objc_retain with an objc_retainBlock.
   if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock)
@@ -1868,6 +1855,26 @@ Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) {
   return AutoreleaseCallee;
 }
 
+/// IsPotentialUse - Test whether the given value is possible a
+/// reference-counted pointer, including tests which utilize AliasAnalysis.
+static bool IsPotentialUse(const Value *Op, AliasAnalysis &AA) {
+  // First make the rudimentary check.
+  if (!IsPotentialUse(Op))
+    return false;
+
+  // Objects in constant memory are not reference-counted.
+  if (AA.pointsToConstantMemory(Op))
+    return false;
+
+  // Pointers in constant memory are not pointing to reference-counted objects.
+  if (const LoadInst *LI = dyn_cast<LoadInst>(Op))
+    if (AA.pointsToConstantMemory(LI->getPointerOperand()))
+      return false;
+
+  // Otherwise assume the worst.
+  return true;
+}
+
 /// CanAlterRefCount - Test whether the given instruction can result in a
 /// reference count modification (positive or negative) for the pointer's
 /// object.
@@ -1894,7 +1901,7 @@ CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
     for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
          I != E; ++I) {
       const Value *Op = *I;
-      if (IsPotentialUse(Op) && PA.related(Ptr, Op))
+      if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op))
         return true;
     }
     return false;
@@ -1919,14 +1926,14 @@ CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA,
     // Comparing a pointer with null, or any other constant, isn't really a use,
     // because we don't care what the pointer points to, or about the values
     // of any other dynamic reference-counted pointers.
-    if (!IsPotentialUse(ICI->getOperand(1)))
+    if (!IsPotentialUse(ICI->getOperand(1), *PA.getAA()))
       return false;
   } else if (ImmutableCallSite CS = static_cast<const Value *>(Inst)) {
     // For calls, just check the arguments (and not the callee operand).
     for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(),
          OE = CS.arg_end(); OI != OE; ++OI) {
       const Value *Op = *OI;
-      if (IsPotentialUse(Op) && PA.related(Ptr, Op))
+      if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op))
         return true;
     }
     return false;
@@ -1936,14 +1943,14 @@ CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA,
     const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand());
     // If we can't tell what the underlying object was, assume there is a
     // dependence.
-    return IsPotentialUse(Op) && PA.related(Op, Ptr);
+    return IsPotentialUse(Op, *PA.getAA()) && PA.related(Op, Ptr);
   }
 
   // Check each operand for a match.
   for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end();
        OI != OE; ++OI) {
     const Value *Op = *OI;
-    if (IsPotentialUse(Op) && PA.related(Ptr, Op))
+    if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op))
       return true;
   }
   return false;
@@ -2612,11 +2619,11 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
     MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
     S.ResetSequenceProgress(ReleaseMetadata ? S_MovableRelease : S_Release);
     S.RRI.ReleaseMetadata = ReleaseMetadata;
-    S.RRI.KnownSafe = S.IsKnownNested() || S.IsKnownIncremented();
+    S.RRI.KnownSafe = S.IsKnownIncremented();
     S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
     S.RRI.Calls.insert(Inst);
 
-    S.IncrementNestCount();
+    S.SetKnownPositiveRefCount();
     break;
   }
   case IC_RetainBlock:
@@ -2631,7 +2638,6 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
 
     PtrState &S = MyStates.getPtrBottomUpState(Arg);
     S.SetKnownPositiveRefCount();
-    S.DecrementNestCount();
 
     switch (S.GetSeq()) {
     case S_Stop:
@@ -2747,8 +2753,9 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
 
   // Merge the states from each successor to compute the initial state
   // for the current block.
-  for (BBState::edge_iterator SI(MyStates.succ_begin()),
-       SE(MyStates.succ_end()); SI != SE; ++SI) {
+  BBState::edge_iterator SI(MyStates.succ_begin()),
+                         SE(MyStates.succ_end());
+  if (SI != SE) {
     const BasicBlock *Succ = *SI;
     DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ);
     assert(I != BBStates.end());
@@ -2760,7 +2767,6 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
       assert(I != BBStates.end());
       MyStates.MergeSucc(I->second);
     }
-    break;
   }
 
   // Visit all the instructions, bottom-up.
@@ -2823,12 +2829,11 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
 
       S.ResetSequenceProgress(S_Retain);
       S.RRI.IsRetainBlock = Class == IC_RetainBlock;
-      // Don't check S.IsKnownIncremented() here because it's not sufficient.
-      S.RRI.KnownSafe = S.IsKnownNested();
+      S.RRI.KnownSafe = S.IsKnownIncremented();
       S.RRI.Calls.insert(Inst);
     }
 
-    S.IncrementNestCount();
+    S.SetKnownPositiveRefCount();
 
     // A retain can be a potential use; procede to the generic checking
     // code below.
@@ -2838,7 +2843,7 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
     Arg = GetObjCArg(Inst);
 
     PtrState &S = MyStates.getPtrTopDownState(Arg);
-    S.DecrementNestCount();
+    S.ClearRefCount();
 
     switch (S.GetSeq()) {
     case S_Retain:
@@ -2935,8 +2940,9 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
 
   // Merge the states from each predecessor to compute the initial state
   // for the current block.
-  for (BBState::edge_iterator PI(MyStates.pred_begin()),
-       PE(MyStates.pred_end()); PI != PE; ++PI) {
+  BBState::edge_iterator PI(MyStates.pred_begin()),
+                         PE(MyStates.pred_end());
+  if (PI != PE) {
     const BasicBlock *Pred = *PI;
     DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred);
     assert(I != BBStates.end());
@@ -2948,7 +2954,6 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
       assert(I != BBStates.end());
       MyStates.MergePred(I->second);
     }
-    break;
   }
 
   // Visit all the instructions, top-down.
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index d13e4ab..6d27db1 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -59,9 +59,9 @@ FunctionPass *llvm::createCFGSimplificationPass() {
   return new CFGSimplifyPass();
 }
 
-/// ChangeToUnreachable - Insert an unreachable instruction before the specified
+/// changeToUnreachable - Insert an unreachable instruction before the specified
 /// instruction, making it and the rest of the code in the block dead.
-static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) {
+static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) {
   BasicBlock *BB = I->getParent();
   // Loop over all of the successors, removing BB's entry from any PHI
   // nodes.
@@ -87,8 +87,8 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) {
   }
 }
 
-/// ChangeToCall - Convert the specified invoke into a normal call.
-static void ChangeToCall(InvokeInst *II) {
+/// changeToCall - Convert the specified invoke into a normal call.
+static void changeToCall(InvokeInst *II) {
   SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
   CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II);
   NewCall->takeName(II);
@@ -105,7 +105,7 @@ static void ChangeToCall(InvokeInst *II) {
   II->eraseFromParent();
 }
 
-static bool MarkAliveBlocks(BasicBlock *BB,
+static bool markAliveBlocks(BasicBlock *BB,
                             SmallPtrSet<BasicBlock*, 128> &Reachable) {
 
   SmallVector<BasicBlock*, 128> Worklist;
@@ -129,7 +129,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
           ++BBI;
           if (!isa<UnreachableInst>(BBI)) {
             // Don't insert a call to llvm.trap right before the unreachable.
-            ChangeToUnreachable(BBI, false);
+            changeToUnreachable(BBI, false);
             Changed = true;
           }
           break;
@@ -148,7 +148,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
         if (isa<UndefValue>(Ptr) ||
             (isa<ConstantPointerNull>(Ptr) &&
              SI->getPointerAddressSpace() == 0)) {
-          ChangeToUnreachable(SI, true);
+          changeToUnreachable(SI, true);
           Changed = true;
           break;
         }
@@ -159,7 +159,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
     if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
       Value *Callee = II->getCalledValue();
       if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
-        ChangeToUnreachable(II, true);
+        changeToUnreachable(II, true);
         Changed = true;
       } else if (II->doesNotThrow()) {
         if (II->use_empty() && II->onlyReadsMemory()) {
@@ -168,7 +168,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
           II->getUnwindDest()->removePredecessor(II->getParent());
           II->eraseFromParent();
         } else
-          ChangeToCall(II);
+          changeToCall(II);
         Changed = true;
       }
     }
@@ -180,12 +180,12 @@ static bool MarkAliveBlocks(BasicBlock *BB,
   return Changed;
 }
 
-/// RemoveUnreachableBlocksFromFn - Remove blocks that are not reachable, even
+/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even
 /// if they are in a dead cycle.  Return true if a change was made, false
 /// otherwise.
-static bool RemoveUnreachableBlocksFromFn(Function &F) {
+static bool removeUnreachableBlocksFromFn(Function &F) {
   SmallPtrSet<BasicBlock*, 128> Reachable;
-  bool Changed = MarkAliveBlocks(F.begin(), Reachable);
+  bool Changed = markAliveBlocks(F.begin(), Reachable);
 
   // If there are unreachable blocks in the CFG...
   if (Reachable.size() == F.size())
@@ -215,9 +215,9 @@ static bool RemoveUnreachableBlocksFromFn(Function &F) {
   return true;
 }
 
-/// MergeEmptyReturnBlocks - If we have more than one empty (other than phi
+/// mergeEmptyReturnBlocks - If we have more than one empty (other than phi
 /// node) return blocks, merge them together to promote recursive block merging.
-static bool MergeEmptyReturnBlocks(Function &F) {
+static bool mergeEmptyReturnBlocks(Function &F) {
   bool Changed = false;
 
   BasicBlock *RetBlock = 0;
@@ -291,9 +291,9 @@ static bool MergeEmptyReturnBlocks(Function &F) {
   return Changed;
 }
 
-/// IterativeSimplifyCFG - Call SimplifyCFG on all the blocks in the function,
+/// iterativelySimplifyCFG - Call SimplifyCFG on all the blocks in the function,
 /// iterating until no more changes are made.
-static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) {
+static bool iterativelySimplifyCFG(Function &F, const TargetData *TD) {
   bool Changed = false;
   bool LocalChange = true;
   while (LocalChange) {
@@ -317,24 +317,24 @@ static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) {
 //
 bool CFGSimplifyPass::runOnFunction(Function &F) {
   const TargetData *TD = getAnalysisIfAvailable<TargetData>();
-  bool EverChanged = RemoveUnreachableBlocksFromFn(F);
-  EverChanged |= MergeEmptyReturnBlocks(F);
-  EverChanged |= IterativeSimplifyCFG(F, TD);
+  bool EverChanged = removeUnreachableBlocksFromFn(F);
+  EverChanged |= mergeEmptyReturnBlocks(F);
+  EverChanged |= iterativelySimplifyCFG(F, TD);
 
   // If neither pass changed anything, we're done.
   if (!EverChanged) return false;
 
-  // IterativeSimplifyCFG can (rarely) make some loops dead.  If this happens,
-  // RemoveUnreachableBlocksFromFn is needed to nuke them, which means we should
+  // iterativelySimplifyCFG can (rarely) make some loops dead.  If this happens,
+  // removeUnreachableBlocksFromFn is needed to nuke them, which means we should
   // iterate between the two optimizations.  We structure the code like this to
-  // avoid reruning IterativeSimplifyCFG if the second pass of
-  // RemoveUnreachableBlocksFromFn doesn't do anything.
-  if (!RemoveUnreachableBlocksFromFn(F))
+  // avoid reruning iterativelySimplifyCFG if the second pass of
+  // removeUnreachableBlocksFromFn doesn't do anything.
+  if (!removeUnreachableBlocksFromFn(F))
     return true;
 
   do {
-    EverChanged = IterativeSimplifyCFG(F, TD);
-    EverChanged |= RemoveUnreachableBlocksFromFn(F);
+    EverChanged = iterativelySimplifyCFG(F, TD);
+    EverChanged |= removeUnreachableBlocksFromFn(F);
   } while (EverChanged);
 
   return true;
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 3904419..65311fe 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -28,6 +28,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetData.h"
@@ -38,6 +39,10 @@ using namespace llvm;
 STATISTIC(NumSimplified, "Number of library calls simplified");
 STATISTIC(NumAnnotated, "Number of attributes added to library functions");
 
+static cl::opt<bool> UnsafeFPShrink("enable-double-float-shrink", cl::Hidden,
+                                   cl::init(false),
+                                   cl::desc("Enable unsafe double to float "
+                                            "shrinking for math lib calls"));
 //===----------------------------------------------------------------------===//
 // Optimizer Base Class
 //===----------------------------------------------------------------------===//
@@ -893,16 +898,56 @@ struct MemSetOpt : public LibCallOptimization {
 //===----------------------------------------------------------------------===//
 
 //===---------------------------------------===//
-// 'cos*' Optimizations
+// Double -> Float Shrinking Optimizations for Unary Functions like 'floor'
+
+struct UnaryDoubleFPOpt : public LibCallOptimization {
+  bool CheckRetType;
+  UnaryDoubleFPOpt(bool CheckReturnType): CheckRetType(CheckReturnType) {}
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() ||
+        !FT->getParamType(0)->isDoubleTy())
+      return 0;
+
+    if (CheckRetType) {
+      // Check if all the uses for function like 'sin' are converted to float.
+      for (Value::use_iterator UseI = CI->use_begin(); UseI != CI->use_end();
+          ++UseI) {
+        FPTruncInst *Cast = dyn_cast<FPTruncInst>(*UseI);
+        if (Cast == 0 || !Cast->getType()->isFloatTy())
+          return 0;
+      }
+    }
+
+    // If this is something like 'floor((double)floatval)', convert to floorf.
+    FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0));
+    if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy())
+      return 0;
+
+    // floor((double)floatval) -> (double)floorf(floatval)
+    Value *V = Cast->getOperand(0);
+    V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes());
+    return B.CreateFPExt(V, B.getDoubleTy());
+  }
+};
 
+//===---------------------------------------===//
+// 'cos*' Optimizations
 struct CosOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    Value *Ret = NULL;
+    if (UnsafeFPShrink && Callee->getName() == "cos" &&
+        TLI->has(LibFunc::cosf)) {
+      UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+      Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B);
+    }
+
     FunctionType *FT = Callee->getFunctionType();
     // Just make sure this has 1 argument of FP type, which matches the
     // result type.
     if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
         !FT->getParamType(0)->isFloatingPointTy())
-      return 0;
+      return Ret;
 
     // cos(-x) -> cos(x)
     Value *Op1 = CI->getArgOperand(0);
@@ -910,7 +955,7 @@ struct CosOpt : public LibCallOptimization {
       BinaryOperator *BinExpr = cast<BinaryOperator>(Op1);
       return B.CreateCall(Callee, BinExpr->getOperand(1), "cos");
     }
-    return 0;
+    return Ret;
   }
 };
 
@@ -919,13 +964,20 @@ struct CosOpt : public LibCallOptimization {
 
 struct PowOpt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    Value *Ret = NULL;
+    if (UnsafeFPShrink && Callee->getName() == "pow" &&
+        TLI->has(LibFunc::powf)) {
+      UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+      Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B);
+    }
+
     FunctionType *FT = Callee->getFunctionType();
     // Just make sure this has 2 arguments of the same FP type, which match the
     // result type.
     if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
         FT->getParamType(0) != FT->getParamType(1) ||
         !FT->getParamType(0)->isFloatingPointTy())
-      return 0;
+      return Ret;
 
     Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1);
     if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
@@ -936,7 +988,7 @@ struct PowOpt : public LibCallOptimization {
     }
 
     ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
-    if (Op2C == 0) return 0;
+    if (Op2C == 0) return Ret;
 
     if (Op2C->getValueAPF().isZero())  // pow(x, 0.0) -> 1.0
       return ConstantFP::get(CI->getType(), 1.0);
@@ -974,12 +1026,19 @@ struct PowOpt : public LibCallOptimization {
 
 struct Exp2Opt : public LibCallOptimization {
   virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    Value *Ret = NULL;
+    if (UnsafeFPShrink && Callee->getName() == "exp2" &&
+        TLI->has(LibFunc::exp2)) {
+      UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
+      Ret = UnsafeUnaryDoubleFP.CallOptimizer(Callee, CI, B);
+    }
+
     FunctionType *FT = Callee->getFunctionType();
     // Just make sure this has 1 argument of FP type, which matches the
     // result type.
     if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
         !FT->getParamType(0)->isFloatingPointTy())
-      return 0;
+      return Ret;
 
     Value *Op = CI->getArgOperand(0);
     // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x))  if sizeof(x) <= 32
@@ -1016,29 +1075,7 @@ struct Exp2Opt : public LibCallOptimization {
 
       return CI;
     }
-    return 0;
-  }
-};
-
-//===---------------------------------------===//
-// Double -> Float Shrinking Optimizations for Unary Functions like 'floor'
-
-struct UnaryDoubleFPOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() ||
-        !FT->getParamType(0)->isDoubleTy())
-      return 0;
-
-    // If this is something like 'floor((double)floatval)', convert to floorf.
-    FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0));
-    if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy())
-      return 0;
-
-    // floor((double)floatval) -> (double)floorf(floatval)
-    Value *V = Cast->getOperand(0);
-    V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes());
-    return B.CreateFPExt(V, B.getDoubleTy());
+    return Ret;
   }
 };
 
@@ -1534,7 +1571,8 @@ namespace {
     StrToOpt StrTo; StrSpnOpt StrSpn; StrCSpnOpt StrCSpn; StrStrOpt StrStr;
     MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet;
     // Math Library Optimizations
-    CosOpt Cos; PowOpt Pow; Exp2Opt Exp2; UnaryDoubleFPOpt UnaryDoubleFP;
+    CosOpt Cos; PowOpt Pow; Exp2Opt Exp2;
+    UnaryDoubleFPOpt UnaryDoubleFP, UnsafeUnaryDoubleFP;
     // Integer Optimizations
     FFSOpt FFS; AbsOpt Abs; IsDigitOpt IsDigit; IsAsciiOpt IsAscii;
     ToAsciiOpt ToAscii;
@@ -1547,10 +1585,13 @@ namespace {
   public:
     static char ID; // Pass identification
     SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true),
-                         StpCpy(false), StpCpyChk(true) {
+                         StpCpy(false), StpCpyChk(true),
+                         UnaryDoubleFP(false), UnsafeUnaryDoubleFP(true) {
       initializeSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
     }
     void AddOpt(LibFunc::Func F, LibCallOptimization* Opt);
+    void AddOpt(LibFunc::Func F1, LibFunc::Func F2, LibCallOptimization* Opt);
+
     void InitOptimizations();
     bool runOnFunction(Function &F);
 
@@ -1586,6 +1627,12 @@ void SimplifyLibCalls::AddOpt(LibFunc::Func F, LibCallOptimization* Opt) {
     Optimizations[TLI->getName(F)] = Opt;
 }
 
+void SimplifyLibCalls::AddOpt(LibFunc::Func F1, LibFunc::Func F2,
+                              LibCallOptimization* Opt) {
+  if (TLI->has(F1) && TLI->has(F2))
+    Optimizations[TLI->getName(F1)] = Opt;
+}
+
 /// Optimizations - Populate the Optimizations map with all the optimizations
 /// we know.
 void SimplifyLibCalls::InitOptimizations() {
@@ -1641,20 +1688,37 @@ void SimplifyLibCalls::InitOptimizations() {
   Optimizations["llvm.exp2.f64"] = &Exp2;
   Optimizations["llvm.exp2.f32"] = &Exp2;
 
-  if (TLI->has(LibFunc::fabs) && TLI->has(LibFunc::fabsf))
-    Optimizations["fabs"] = &UnaryDoubleFP;
-  if (TLI->has(LibFunc::floor) && TLI->has(LibFunc::floorf))
-    Optimizations["floor"] = &UnaryDoubleFP;
-  if (TLI->has(LibFunc::ceil) && TLI->has(LibFunc::ceilf))
-    Optimizations["ceil"] = &UnaryDoubleFP;
-  if (TLI->has(LibFunc::round) && TLI->has(LibFunc::roundf))
-    Optimizations["round"] = &UnaryDoubleFP;
-  if (TLI->has(LibFunc::rint) && TLI->has(LibFunc::rintf))
-    Optimizations["rint"] = &UnaryDoubleFP;
-  if (TLI->has(LibFunc::nearbyint) && TLI->has(LibFunc::nearbyintf))
-    Optimizations["nearbyint"] = &UnaryDoubleFP;
-  if (TLI->has(LibFunc::trunc) && TLI->has(LibFunc::truncf))
-    Optimizations["trunc"] = &UnaryDoubleFP;
+  AddOpt(LibFunc::ceil, LibFunc::ceilf, &UnaryDoubleFP);
+  AddOpt(LibFunc::fabs, LibFunc::fabsf, &UnaryDoubleFP);
+  AddOpt(LibFunc::floor, LibFunc::floorf, &UnaryDoubleFP);
+  AddOpt(LibFunc::rint, LibFunc::rintf, &UnaryDoubleFP);
+  AddOpt(LibFunc::round, LibFunc::roundf, &UnaryDoubleFP);
+  AddOpt(LibFunc::nearbyint, LibFunc::nearbyintf, &UnaryDoubleFP);
+  AddOpt(LibFunc::trunc, LibFunc::truncf, &UnaryDoubleFP);
+
+  if(UnsafeFPShrink) {
+    AddOpt(LibFunc::acos, LibFunc::acosf, &UnsafeUnaryDoubleFP);
+    AddOpt(LibFunc::acosh, LibFunc::acoshf, &UnsafeUnaryDoubleFP);
+    AddOpt(LibFunc::asin, LibFunc::asinf, &UnsafeUnaryDoubleFP);
+    AddOpt(LibFunc::asinh, LibFunc::asinhf, &UnsafeUnaryDoubleFP);
+    AddOpt(LibFunc::atan, LibFunc::atanf, &UnsafeUnaryDoubleFP);
+    AddOpt(LibFunc::atanh, LibFunc::atanhf, &UnsafeUnaryDoubleFP);
+    AddOpt(LibFunc::cbrt, LibFunc::cbrtf, &UnsafeUnaryDoubleFP);
+    AddOpt(LibFunc::cosh, LibFunc::coshf, &UnsafeUnaryDoubleFP);
+    AddOpt(LibFunc::exp, LibFunc::expf, &UnsafeUnaryDoubleFP);
+    AddOpt(LibFunc::exp10, LibFunc::exp10f, &UnsafeUnaryDoubleFP);
+    AddOpt(LibFunc::expm1, LibFunc::expm1f, &UnsafeUnaryDoubleFP);
+    AddOpt(LibFunc::log, LibFunc::logf, &UnsafeUnaryDoubleFP);
+    AddOpt(LibFunc::log10, LibFunc::log10f, &UnsafeUnaryDoubleFP);
+    AddOpt(LibFunc::log1p, LibFunc::log1pf, &UnsafeUnaryDoubleFP);
+    AddOpt(LibFunc::log2, LibFunc::log2f, &UnsafeUnaryDoubleFP);
+    AddOpt(LibFunc::logb, LibFunc::logbf, &UnsafeUnaryDoubleFP);
+    AddOpt(LibFunc::sin, LibFunc::sinf, &UnsafeUnaryDoubleFP);
+    AddOpt(LibFunc::sinh, LibFunc::sinhf, &UnsafeUnaryDoubleFP);
+    AddOpt(LibFunc::sqrt, LibFunc::sqrtf, &UnsafeUnaryDoubleFP);
+    AddOpt(LibFunc::tan, LibFunc::tanf, &UnsafeUnaryDoubleFP);
+    AddOpt(LibFunc::tanh, LibFunc::tanhf, &UnsafeUnaryDoubleFP);
+  }
 
   // Integer Optimizations
   Optimizations["ffs"] = &FFS;
diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp
index d831452..1e6586b 100644
--- a/lib/Transforms/Utils/AddrModeMatcher.cpp
+++ b/lib/Transforms/Utils/AddrModeMatcher.cpp
@@ -55,10 +55,12 @@ void ExtAddrMode::print(raw_ostream &OS) const {
   OS << ']';
 }
 
+#ifndef NDEBUG
 void ExtAddrMode::dump() const {
   print(dbgs());
   dbgs() << '\n';
 }
+#endif
 
 
 /// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode.
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 2679b93..75a7817 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -94,7 +94,7 @@ void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) {
 /// is dead. Also recursively delete any operands that become dead as
 /// a result. This includes tracing the def-use list from the PHI to see if
 /// it is ultimately unused or if it reaches an unused cycle.
-bool llvm::DeleteDeadPHIs(BasicBlock *BB) {
+bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) {
   // Recursively deleting a PHI may cause multiple PHIs to be deleted
   // or RAUW'd undef, so use an array of WeakVH for the PHIs to delete.
   SmallVector<WeakVH, 8> PHIs;
@@ -105,7 +105,7 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB) {
   bool Changed = false;
   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
     if (PHINode *PN = dyn_cast_or_null<PHINode>(PHIs[i].operator Value*()))
-      Changed |= RecursivelyDeleteDeadPHINode(PN);
+      Changed |= RecursivelyDeleteDeadPHINode(PN, TLI);
 
   return Changed;
 }
diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp
new file mode 100644
index 0000000..30d60be
--- /dev/null
+++ b/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -0,0 +1,253 @@
+//===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an optimization for div and rem on architectures that
+// execute short instructions significantly faster than longer instructions.
+// For example, on Intel Atom 32-bit divides are slow enough that during
+// runtime it is profitable to check the value of the operands, and if they are
+// positive and less than 256 use an unsigned 8-bit divide.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "bypass-slow-division"
+#include "llvm/Instructions.h"
+#include "llvm/Function.h"
+#include "llvm/IRBuilder.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Transforms/Utils/BypassSlowDivision.h"
+
+using namespace llvm;
+
+namespace {
+  struct DivOpInfo {
+    bool SignedOp;
+    Value *Dividend;
+    Value *Divisor;
+
+    DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor)
+      : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
+  };
+
+  struct DivPhiNodes {
+    PHINode *Quotient;
+    PHINode *Remainder;
+
+    DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder)
+      : Quotient(InQuotient), Remainder(InRemainder) {}
+  };
+}
+
+namespace llvm {
+  template<>
+  struct DenseMapInfo<DivOpInfo> {
+    static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) {
+      return Val1.SignedOp == Val2.SignedOp &&
+             Val1.Dividend == Val2.Dividend &&
+             Val1.Divisor == Val2.Divisor;
+    }
+
+    static DivOpInfo getEmptyKey() {
+      return DivOpInfo(false, 0, 0);
+    }
+
+    static DivOpInfo getTombstoneKey() {
+      return DivOpInfo(true, 0, 0);
+    }
+
+    static unsigned getHashValue(const DivOpInfo &Val) {
+      return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
+                        reinterpret_cast<uintptr_t>(Val.Divisor)) ^
+                        (unsigned)Val.SignedOp;
+    }
+  };
+
+  typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy;
+}
+
+// insertFastDiv - Substitutes the div/rem instruction with code that checks the
+// value of the operands and uses a shorter-faster div/rem instruction when
+// possible and the longer-slower div/rem instruction otherwise.
+static bool insertFastDiv(Function &F,
+                          Function::iterator &I,
+                          BasicBlock::iterator &J,
+                          IntegerType *BypassType,
+                          bool UseDivOp,
+                          bool UseSignedOp,
+                          DivCacheTy &PerBBDivCache) {
+  // Get instruction operands
+  Instruction *Instr = J;
+  Value *Dividend = Instr->getOperand(0);
+  Value *Divisor = Instr->getOperand(1);
+
+  if (isa<ConstantInt>(Divisor) ||
+      (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) {
+    // Operations with immediate values should have
+    // been solved and replaced during compile time.
+    return false;
+  }
+
+  // Basic Block is split before divide
+  BasicBlock *MainBB = I;
+  BasicBlock *SuccessorBB = I->splitBasicBlock(J);
+  ++I; //advance iterator I to successorBB
+
+  // Add new basic block for slow divide operation
+  BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "",
+                                          MainBB->getParent(), SuccessorBB);
+  SlowBB->moveBefore(SuccessorBB);
+  IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
+  Value *SlowQuotientV;
+  Value *SlowRemainderV;
+  if (UseSignedOp) {
+    SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
+    SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
+  } else {
+    SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
+    SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
+  }
+  SlowBuilder.CreateBr(SuccessorBB);
+
+  // Add new basic block for fast divide operation
+  BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "",
+                                          MainBB->getParent(), SuccessorBB);
+  FastBB->moveBefore(SlowBB);
+  IRBuilder<> FastBuilder(FastBB, FastBB->begin());
+  Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
+                                                BypassType);
+  Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
+                                                 BypassType);
+
+  // udiv/urem because optimization only handles positive numbers
+  Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV,
+                                                      ShortDivisorV);
+  Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
+                                                  ShortDivisorV);
+  Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
+                                                ShortQuotientV,
+                                                Dividend->getType());
+  Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
+                                                 ShortRemainderV,
+                                                 Dividend->getType());
+  FastBuilder.CreateBr(SuccessorBB);
+
+  // Phi nodes for result of div and rem
+  IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
+  PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
+  QuoPhi->addIncoming(SlowQuotientV, SlowBB);
+  QuoPhi->addIncoming(FastQuotientV, FastBB);
+  PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
+  RemPhi->addIncoming(SlowRemainderV, SlowBB);
+  RemPhi->addIncoming(FastRemainderV, FastBB);
+
+  // Replace Instr with appropriate phi node
+  if (UseDivOp)
+    Instr->replaceAllUsesWith(QuoPhi);
+  else
+    Instr->replaceAllUsesWith(RemPhi);
+  Instr->eraseFromParent();
+
+  // Combine operands into a single value with OR for value testing below
+  MainBB->getInstList().back().eraseFromParent();
+  IRBuilder<> MainBuilder(MainBB, MainBB->end());
+  Value *OrV = MainBuilder.CreateOr(Dividend, Divisor);
+
+  // BitMask is inverted to check if the operands are
+  // larger than the bypass type
+  uint64_t BitMask = ~BypassType->getBitMask();
+  Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
+
+  // Compare operand values and branch
+  Value *ZeroV = MainBuilder.getInt32(0);
+  Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
+  MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
+
+  // point iterator J at first instruction of successorBB
+  J = I->begin();
+
+  // Cache phi nodes to be used later in place of other instances
+  // of div or rem with the same sign, dividend, and divisor
+  DivOpInfo Key(UseSignedOp, Dividend, Divisor);
+  DivPhiNodes Value(QuoPhi, RemPhi);
+  PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
+  return true;
+}
+
+// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if
+// operands and operation are identical. Otherwise call insertFastDiv to perform
+// the optimization and cache the resulting dividend and remainder.
+static bool reuseOrInsertFastDiv(Function &F,
+                                 Function::iterator &I,
+                                 BasicBlock::iterator &J,
+                                 IntegerType *BypassType,
+                                 bool UseDivOp,
+                                 bool UseSignedOp,
+                                 DivCacheTy &PerBBDivCache) {
+  // Get instruction operands
+  Instruction *Instr = J;
+  DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1));
+  DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
+
+  if (CacheI == PerBBDivCache.end()) {
+    // If previous instance does not exist, insert fast div
+    return insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp,
+                         PerBBDivCache);
+  }
+
+  // Replace operation value with previously generated phi node
+  DivPhiNodes &Value = CacheI->second;
+  if (UseDivOp) {
+    // Replace all uses of div instruction with quotient phi node
+    J->replaceAllUsesWith(Value.Quotient);
+  } else {
+    // Replace all uses of rem instruction with remainder phi node
+    J->replaceAllUsesWith(Value.Remainder);
+  }
+
+  // Advance to next operation
+  ++J;
+
+  // Remove redundant operation
+  Instr->eraseFromParent();
+  return true;
+}
+
+// bypassSlowDivision - This optimization identifies DIV instructions that can
+// be profitably bypassed and carried out with a shorter, faster divide.
+bool llvm::bypassSlowDivision(Function &F,
+                              Function::iterator &I,
+                              const DenseMap<Type *, Type *> &BypassTypeMap) {
+  DivCacheTy DivCache;
+
+  bool MadeChange = false;
+  for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) {
+
+    // Get instruction details
+    unsigned Opcode = J->getOpcode();
+    bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
+    bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem;
+    bool UseSignedOp = Opcode == Instruction::SDiv ||
+                       Opcode == Instruction::SRem;
+
+    // Only optimize div or rem ops
+    if (!UseDivOp && !UseRemOp)
+      continue;
+
+    // Continue if div/rem type is not bypassed
+    DenseMap<Type *, Type *>::const_iterator BT =
+      BypassTypeMap.find(J->getType());
+    if (BT == BypassTypeMap.end())
+      continue;
+
+    IntegerType *BypassType = cast<IntegerType>(BT->second);
+    MadeChange |= reuseOrInsertFastDiv(F, I, J, BypassType, UseDivOp,
+                                       UseSignedOp, DivCache);
+  }
+
+  return MadeChange;
+}
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 4ff31ca..215a16f 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -3,6 +3,7 @@ add_llvm_library(LLVMTransformUtils
   BasicBlockUtils.cpp
   BreakCriticalEdges.cpp
   BuildLibCalls.cpp
+  BypassSlowDivision.cpp
   CloneFunction.cpp
   CloneModule.cpp
   CmpInstAnalysis.cpp
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index bed7d72..0601433 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -52,7 +52,8 @@ using namespace llvm;
 /// Also calls RecursivelyDeleteTriviallyDeadInstructions() on any branch/switch
 /// conditions and indirectbr addresses this might make dead if
 /// DeleteDeadConditions is true.
-bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) {
+bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
+                                  const TargetLibraryInfo *TLI) {
   TerminatorInst *T = BB->getTerminator();
   IRBuilder<> Builder(T);
 
@@ -96,7 +97,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) {
       Value *Cond = BI->getCondition();
       BI->eraseFromParent();
       if (DeleteDeadConditions)
-        RecursivelyDeleteTriviallyDeadInstructions(Cond);
+        RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI);
       return true;
     }
     return false;
@@ -161,7 +162,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) {
       Value *Cond = SI->getCondition();
       SI->eraseFromParent();
       if (DeleteDeadConditions)
-        RecursivelyDeleteTriviallyDeadInstructions(Cond);
+        RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI);
       return true;
     }
     
@@ -205,7 +206,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) {
       Value *Address = IBI->getAddress();
       IBI->eraseFromParent();
       if (DeleteDeadConditions)
-        RecursivelyDeleteTriviallyDeadInstructions(Address);
+        RecursivelyDeleteTriviallyDeadInstructions(Address, TLI);
       
       // If we didn't find our destination in the IBI successor list, then we
       // have undefined behavior.  Replace the unconditional branch with an
@@ -230,7 +231,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) {
 /// isInstructionTriviallyDead - Return true if the result produced by the
 /// instruction is not used, and the instruction has no side effects.
 ///
-bool llvm::isInstructionTriviallyDead(Instruction *I) {
+bool llvm::isInstructionTriviallyDead(Instruction *I,
+                                      const TargetLibraryInfo *TLI) {
   if (!I->use_empty() || isa<TerminatorInst>(I)) return false;
 
   // We don't want the landingpad instruction removed by anything this general.
@@ -265,9 +267,9 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) {
       return isa<UndefValue>(II->getArgOperand(1));
   }
 
-  if (isAllocLikeFn(I)) return true;
+  if (isAllocLikeFn(I, TLI)) return true;
 
-  if (CallInst *CI = isFreeCall(I))
+  if (CallInst *CI = isFreeCall(I, TLI))
     if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0)))
       return C->isNullValue() || isa<UndefValue>(C);
 
@@ -278,9 +280,11 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) {
 /// trivially dead instruction, delete it.  If that makes any of its operands
 /// trivially dead, delete them too, recursively.  Return true if any
 /// instructions were deleted.
-bool llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) {
+bool
+llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V,
+                                                 const TargetLibraryInfo *TLI) {
   Instruction *I = dyn_cast<Instruction>(V);
-  if (!I || !I->use_empty() || !isInstructionTriviallyDead(I))
+  if (!I || !I->use_empty() || !isInstructionTriviallyDead(I, TLI))
     return false;
   
   SmallVector<Instruction*, 16> DeadInsts;
@@ -301,7 +305,7 @@ bool llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) {
       // operand, and if it is 'trivially' dead, delete it in a future loop
       // iteration.
       if (Instruction *OpI = dyn_cast<Instruction>(OpV))
-        if (isInstructionTriviallyDead(OpI))
+        if (isInstructionTriviallyDead(OpI, TLI))
           DeadInsts.push_back(OpI);
     }
     
@@ -334,19 +338,20 @@ static bool areAllUsesEqual(Instruction *I) {
 /// either forms a cycle or is terminated by a trivially dead instruction,
 /// delete it.  If that makes any of its operands trivially dead, delete them
 /// too, recursively.  Return true if a change was made.
-bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) {
+bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN,
+                                        const TargetLibraryInfo *TLI) {
   SmallPtrSet<Instruction*, 4> Visited;
   for (Instruction *I = PN; areAllUsesEqual(I) && !I->mayHaveSideEffects();
        I = cast<Instruction>(*I->use_begin())) {
     if (I->use_empty())
-      return RecursivelyDeleteTriviallyDeadInstructions(I);
+      return RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
 
     // If we find an instruction more than once, we're on a cycle that
     // won't prove fruitful.
     if (!Visited.insert(I)) {
       // Break the cycle and delete the instruction and its operands.
       I->replaceAllUsesWith(UndefValue::get(I->getType()));
-      (void)RecursivelyDeleteTriviallyDeadInstructions(I);
+      (void)RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
       return true;
     }
   }
@@ -358,7 +363,8 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) {
 ///
 /// This returns true if it changed the code, note that it can delete
 /// instructions in other blocks as well in this block.
-bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) {
+bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD,
+                                       const TargetLibraryInfo *TLI) {
   bool MadeChange = false;
 
 #ifndef NDEBUG
@@ -381,7 +387,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) {
       continue;
     }
 
-    MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst);
+    MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
     if (BIHandle != BI)
       BI = BB->begin();
   }
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 518df7c..32d7fa1 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -22,6 +22,7 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/MDBuilder.h"
 #include "llvm/Metadata.h"
+#include "llvm/Module.h"
 #include "llvm/Operator.h"
 #include "llvm/Type.h"
 #include "llvm/ADT/DenseMap.h"
@@ -54,6 +55,7 @@ DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false),
        cl::desc("Duplicate return instructions into unconditional branches"));
 
 STATISTIC(NumSpeculations, "Number of speculative executed instructions");
+STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
 
 namespace {
   /// ValueEqualityComparisonCase - Represents a case of a switch.
@@ -101,14 +103,14 @@ public:
 ///
 static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) {
   if (SI1 == SI2) return false;  // Can't merge with self!
-  
+
   // It is not safe to merge these two switch instructions if they have a common
   // successor, and if that successor has a PHI node, and if *that* PHI node has
   // conflicting incoming values from the two switch blocks.
   BasicBlock *SI1BB = SI1->getParent();
   BasicBlock *SI2BB = SI2->getParent();
   SmallPtrSet<BasicBlock*, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
-  
+
   for (succ_iterator I = succ_begin(SI2BB), E = succ_end(SI2BB); I != E; ++I)
     if (SI1Succs.count(*I))
       for (BasicBlock::iterator BBI = (*I)->begin();
@@ -118,7 +120,7 @@ static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) {
             PN->getIncomingValueForBlock(SI2BB))
           return false;
       }
-        
+
   return true;
 }
 
@@ -135,7 +137,7 @@ static bool isProfitableToFoldUnconditional(BranchInst *SI1,
   assert(SI1->isUnconditional() && SI2->isConditional());
 
   // We fold the unconditional branch if we can easily update all PHI nodes in
-  // common successors: 
+  // common successors:
   // 1> We have a constant incoming value for the conditional branch;
   // 2> We have "Cond" as the incoming value for the unconditional branch;
   // 3> SI2->getCondition() and Cond have same operands.
@@ -170,7 +172,7 @@ static bool isProfitableToFoldUnconditional(BranchInst *SI1,
 static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
                                   BasicBlock *ExistPred) {
   if (!isa<PHINode>(Succ->begin())) return; // Quick exit if nothing to do
-  
+
   PHINode *PN;
   for (BasicBlock::iterator I = Succ->begin();
        (PN = dyn_cast<PHINode>(I)); ++I)
@@ -222,7 +224,7 @@ static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
     // doesn't dominate BB.
     if (Pred2->getSinglePredecessor() == 0)
       return 0;
-    
+
     // If we found a conditional branch predecessor, make sure that it branches
     // to BB and Pred2Br.  If it doesn't, this isn't an "if statement".
     if (Pred1Br->getSuccessor(0) == BB &&
@@ -252,7 +254,7 @@ static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
   // Otherwise, if this is a conditional branch, then we can use it!
   BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator());
   if (BI == 0) return 0;
-  
+
   assert(BI->isConditional() && "Two successors but not conditional?");
   if (BI->getSuccessor(0) == Pred1) {
     IfTrue = Pred1;
@@ -345,7 +347,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
   // If we aren't allowing aggressive promotion anymore, then don't consider
   // instructions in the 'if region'.
   if (AggressiveInsts == 0) return false;
-  
+
   // If we have seen this instruction before, don't count it again.
   if (AggressiveInsts->count(I)) return true;
 
@@ -411,7 +413,7 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
                        const TargetData *TD, bool isEQ, unsigned &UsedICmps) {
   Instruction *I = dyn_cast<Instruction>(V);
   if (I == 0) return 0;
-  
+
   // If this is an icmp against a constant, handle this as one of the cases.
   if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
     if (ConstantInt *C = GetConstantInt(I->getOperand(1), TD)) {
@@ -420,21 +422,21 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
         Vals.push_back(C);
         return I->getOperand(0);
       }
-      
+
       // If we have "x ult 3" comparison, for example, then we can add 0,1,2 to
       // the set.
       ConstantRange Span =
         ConstantRange::makeICmpRegion(ICI->getPredicate(), C->getValue());
-      
+
       // If this is an and/!= check then we want to optimize "x ugt 2" into
       // x != 0 && x != 1.
       if (!isEQ)
         Span = Span.inverse();
-      
+
       // If there are a ton of values, we don't want to make a ginormous switch.
       if (Span.getSetSize().ugt(8) || Span.isEmptySet())
         return 0;
-      
+
       for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
         Vals.push_back(ConstantInt::get(V->getContext(), Tmp));
       UsedICmps++;
@@ -442,11 +444,11 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
     }
     return 0;
   }
-  
+
   // Otherwise, we can only handle an | or &, depending on isEQ.
   if (I->getOpcode() != (isEQ ? Instruction::Or : Instruction::And))
     return 0;
-  
+
   unsigned NumValsBeforeLHS = Vals.size();
   unsigned UsedICmpsBeforeLHS = UsedICmps;
   if (Value *LHS = GatherConstantCompares(I->getOperand(0), Vals, Extra, TD,
@@ -467,12 +469,12 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
       Extra = I->getOperand(1);
       return LHS;
     }
-    
+
     Vals.resize(NumValsBeforeLHS);
     UsedICmps = UsedICmpsBeforeLHS;
     return 0;
   }
-  
+
   // If the LHS can't be folded in, but Extra is available and RHS can, try to
   // use LHS as Extra.
   if (Extra == 0 || Extra == I->getOperand(0)) {
@@ -484,7 +486,7 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
     assert(Vals.size() == NumValsBeforeLHS);
     Extra = OldExtra;
   }
-  
+
   return 0;
 }
 
@@ -615,6 +617,9 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
   assert(ThisVal && "This isn't a value comparison!!");
   if (ThisVal != PredVal) return false;  // Different predicates.
 
+  // TODO: Preserve branch weight metadata, similarly to how
+  // FoldValueComparisonIntoPredecessors preserves it.
+
   // Find out information about when control will move from Pred to TI's block.
   std::vector<ValueEqualityComparisonCase> PredCases;
   BasicBlock *PredDef = GetValueEqualityComparisonCases(Pred->getTerminator(),
@@ -634,7 +639,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
     // can simplify TI.
     if (!ValuesOverlap(PredCases, ThisCases))
       return false;
-    
+
     if (isa<BranchInst>(TI)) {
       // Okay, one of the successors of this condbr is dead.  Convert it to a
       // uncond br.
@@ -652,7 +657,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
       EraseTerminatorInstAndDCECond(TI);
       return true;
     }
-      
+
     SwitchInst *SI = cast<SwitchInst>(TI);
     // Okay, TI has cases that are statically dead, prune them away.
     SmallPtrSet<Constant*, 16> DeadCases;
@@ -673,7 +678,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
     DEBUG(dbgs() << "Leaving: " << *TI << "\n");
     return true;
   }
-  
+
   // Otherwise, TI's block must correspond to some matched value.  Find out
   // which value (or set of values) this is.
   ConstantInt *TIV = 0;
@@ -729,8 +734,8 @@ namespace {
 }
 
 static int ConstantIntSortPredicate(const void *P1, const void *P2) {
-  const ConstantInt *LHS = *(const ConstantInt**)P1;
-  const ConstantInt *RHS = *(const ConstantInt**)P2;
+  const ConstantInt *LHS = *(const ConstantInt*const*)P1;
+  const ConstantInt *RHS = *(const ConstantInt*const*)P2;
   if (LHS->getValue().ult(RHS->getValue()))
     return 1;
   if (LHS->getValue() == RHS->getValue())
@@ -738,6 +743,67 @@ static int ConstantIntSortPredicate(const void *P1, const void *P2) {
   return -1;
 }
 
+static inline bool HasBranchWeights(const Instruction* I) {
+  MDNode* ProfMD = I->getMetadata(LLVMContext::MD_prof);
+  if (ProfMD && ProfMD->getOperand(0))
+    if (MDString* MDS = dyn_cast<MDString>(ProfMD->getOperand(0)))
+      return MDS->getString().equals("branch_weights");
+
+  return false;
+}
+
+/// Tries to get a branch weight for the given instruction, returns NULL if it
+/// can't. Pos starts at 0.
+static ConstantInt* GetWeight(Instruction* I, int Pos) {
+  MDNode* ProfMD = I->getMetadata(LLVMContext::MD_prof);
+  if (ProfMD && ProfMD->getOperand(0)) {
+    if (MDString* MDS = dyn_cast<MDString>(ProfMD->getOperand(0))) {
+      if (MDS->getString().equals("branch_weights")) {
+        assert(ProfMD->getNumOperands() >= 3);
+        return dyn_cast<ConstantInt>(ProfMD->getOperand(1 + Pos));
+      }
+    }
+  }
+
+  return 0;
+}
+
+/// Scale the given weights based on the successor TI's metadata. Scaling is
+/// done by multiplying every weight by the sum of the successor's weights.
+static void ScaleWeights(Instruction* STI, MutableArrayRef<uint64_t> Weights) {
+  // Sum the successor's weights
+  assert(HasBranchWeights(STI));
+  unsigned Scale = 0;
+  MDNode* ProfMD = STI->getMetadata(LLVMContext::MD_prof);
+  for (unsigned i = 1; i < ProfMD->getNumOperands(); ++i) {
+    ConstantInt* CI = dyn_cast<ConstantInt>(ProfMD->getOperand(i));
+    assert(CI);
+    Scale += CI->getValue().getZExtValue();
+  }
+
+  // Skip default, as it's replaced during the folding
+  for (unsigned i = 1; i < Weights.size(); ++i) {
+    Weights[i] *= Scale;
+  }
+}
+
+/// Sees if any of the weights are too big for a uint32_t, and halves all the
+/// weights if any are.
+static void FitWeights(MutableArrayRef<uint64_t> Weights) {
+  bool Halve = false;
+  for (unsigned i = 0; i < Weights.size(); ++i)
+    if (Weights[i] > UINT_MAX) {
+      Halve = true;
+      break;
+    }
+
+  if (! Halve)
+    return;
+
+  for (unsigned i = 0; i < Weights.size(); ++i)
+    Weights[i] /= 2;
+}
+
 /// FoldValueComparisonIntoPredecessors - The specified terminator is a value
 /// equality comparison instruction (either a switch or a branch on "X == c").
 /// See if any of the predecessors of the terminator block are value comparisons
@@ -770,6 +836,55 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
       // build.
       SmallVector<BasicBlock*, 8> NewSuccessors;
 
+      // Update the branch weight metadata along the way
+      SmallVector<uint64_t, 8> Weights;
+      uint64_t PredDefaultWeight = 0;
+      bool PredHasWeights = HasBranchWeights(PTI);
+      bool SuccHasWeights = HasBranchWeights(TI);
+
+      if (PredHasWeights) {
+        MDNode* MD = PTI->getMetadata(LLVMContext::MD_prof);
+        assert(MD);
+        for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) {
+          ConstantInt* CI = dyn_cast<ConstantInt>(MD->getOperand(i));
+          assert(CI);
+          Weights.push_back(CI->getValue().getZExtValue());
+        }
+
+        // If the predecessor is a conditional eq, then swap the default weight
+        // to be the first entry.
+        if (BranchInst* BI = dyn_cast<BranchInst>(PTI)) {
+          assert(Weights.size() == 2);
+          ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
+
+          if (ICI->getPredicate() == ICmpInst::ICMP_EQ) {
+            std::swap(Weights.front(), Weights.back());
+          }
+        }
+
+        PredDefaultWeight = Weights.front();
+      } else if (SuccHasWeights) {
+        // If there are no predecessor weights but there are successor weights,
+        // populate Weights with 1, which will later be scaled to the sum of
+        // successor's weights
+        Weights.assign(1 + PredCases.size(), 1);
+        PredDefaultWeight = 1;
+      }
+
+      uint64_t SuccDefaultWeight = 0;
+      if (SuccHasWeights) {
+        int Index = 0;
+        if (BranchInst* BI = dyn_cast<BranchInst>(TI)) {
+          ICmpInst* ICI = dyn_cast<ICmpInst>(BI->getCondition());
+          assert(ICI);
+
+          if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+            Index = 1;
+        }
+
+        SuccDefaultWeight = GetWeight(TI, Index)->getValue().getZExtValue();
+      }
+
       if (PredDefault == BB) {
         // If this is the default destination from PTI, only the edges in TI
         // that don't occur in PTI, or that branch to BB will be activated.
@@ -780,6 +895,12 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
           else {
             // The default destination is BB, we don't need explicit targets.
             std::swap(PredCases[i], PredCases.back());
+
+            if (PredHasWeights) {
+              std::swap(Weights[i+1], Weights.back());
+              Weights.pop_back();
+            }
+
             PredCases.pop_back();
             --i; --e;
           }
@@ -790,14 +911,34 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
           PredDefault = BBDefault;
           NewSuccessors.push_back(BBDefault);
         }
+
+        if (SuccHasWeights) {
+          ScaleWeights(TI, Weights);
+          Weights.front() *= SuccDefaultWeight;
+        } else if (PredHasWeights) {
+          Weights.front() /= (1 + BBCases.size());
+        }
+
         for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
           if (!PTIHandled.count(BBCases[i].Value) &&
               BBCases[i].Dest != BBDefault) {
             PredCases.push_back(BBCases[i]);
             NewSuccessors.push_back(BBCases[i].Dest);
+            if (SuccHasWeights) {
+              Weights.push_back(PredDefaultWeight *
+                                GetWeight(TI, i)->getValue().getZExtValue());
+            } else if (PredHasWeights) {
+              // Split the old default's weight amongst the children
+              Weights.push_back(PredDefaultWeight / (1 + BBCases.size()));
+            }
           }
 
       } else {
+        // FIXME: preserve branch weight metadata, similarly to the 'then'
+        // above. For now, drop it.
+        PredHasWeights = false;
+        SuccHasWeights = false;
+
         // If this is not the default destination from PSI, only the edges
         // in SI that occur in PSI with a destination of BB will be
         // activated.
@@ -822,7 +963,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
 
         // If there are any constants vectored to BB that TI doesn't handle,
         // they must go to the default destination of TI.
-        for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I = 
+        for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I =
                                     PTIHandled.begin(),
                E = PTIHandled.end(); I != E; ++I) {
           PredCases.push_back(ValueEqualityComparisonCase(*I, BBDefault));
@@ -851,6 +992,17 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
       for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
         NewSI->addCase(PredCases[i].Value, PredCases[i].Dest);
 
+      if (PredHasWeights || SuccHasWeights) {
+        // Halve the weights if any of them cannot fit in an uint32_t
+        FitWeights(Weights);
+
+        SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
+
+        NewSI->setMetadata(LLVMContext::MD_prof,
+                           MDBuilder(BB->getContext()).
+                           createBranchWeights(MDWeights));
+      }
+
       EraseTerminatorInstAndDCECond(PTI);
 
       // Okay, last check.  If BB is still a successor of PSI, then we must
@@ -984,11 +1136,11 @@ HoistTerminator:
       Value *BB1V = PN->getIncomingValueForBlock(BB1);
       Value *BB2V = PN->getIncomingValueForBlock(BB2);
       if (BB1V == BB2V) continue;
-      
+
       // These values do not agree.  Insert a select instruction before NT
       // that determines the right value.
       SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
-      if (SI == 0) 
+      if (SI == 0)
         SI = cast<SelectInst>
           (Builder.CreateSelect(BI->getCondition(), BB1V, BB2V,
                                 BB1V->getName()+"."+BB2V->getName()));
@@ -1056,7 +1208,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
     // Do not hoist the instruction if any of its operands are defined but not
     // used in this BB. The transformation will prevent the operand from
     // being sunk into the use block.
-    for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end(); 
+    for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end();
          i != e; ++i) {
       Instruction *OpI = dyn_cast<Instruction>(*i);
       if (OpI && OpI->getParent() == BIParent &&
@@ -1112,7 +1264,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
   // as well.
   if (PHIs.empty())
     return false;
-  
+
   // If we get here, we can hoist the instruction and if-convert.
   DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *BB1 << "\n";);
 
@@ -1162,13 +1314,13 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
 static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
   BranchInst *BI = cast<BranchInst>(BB->getTerminator());
   unsigned Size = 0;
-  
+
   for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
     if (isa<DbgInfoIntrinsic>(BBI))
       continue;
     if (Size > 10) return false;  // Don't clone large BB's.
     ++Size;
-    
+
     // We can only support instructions that do not define values that are
     // live outside of the current basic block.
     for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end();
@@ -1176,7 +1328,7 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
       Instruction *U = cast<Instruction>(*UI);
       if (U->getParent() != BB || isa<PHINode>(U)) return false;
     }
-    
+
     // Looks ok, continue checking.
   }
 
@@ -1194,31 +1346,31 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) {
   // outside of the block.
   if (!PN || PN->getParent() != BB || !PN->hasOneUse())
     return false;
-  
+
   // Degenerate case of a single entry PHI.
   if (PN->getNumIncomingValues() == 1) {
     FoldSingleEntryPHINodes(PN->getParent());
-    return true;    
+    return true;
   }
 
   // Now we know that this block has multiple preds and two succs.
   if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false;
-  
+
   // Okay, this is a simple enough basic block.  See if any phi values are
   // constants.
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
     ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i));
     if (CB == 0 || !CB->getType()->isIntegerTy(1)) continue;
-    
+
     // Okay, we now know that all edges from PredBB should be revectored to
     // branch to RealDest.
     BasicBlock *PredBB = PN->getIncomingBlock(i);
     BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
-    
+
     if (RealDest == BB) continue;  // Skip self loops.
     // Skip if the predecessor's terminator is an indirect branch.
     if (isa<IndirectBrInst>(PredBB->getTerminator())) continue;
-    
+
     // The dest block might have PHI nodes, other predecessors and other
     // difficult cases.  Instead of being smart about this, just insert a new
     // block that jumps to the destination block, effectively splitting
@@ -1227,7 +1379,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) {
                                             RealDest->getName()+".critedge",
                                             RealDest->getParent(), RealDest);
     BranchInst::Create(RealDest, EdgeBB);
-    
+
     // Update PHI nodes.
     AddPredecessorToBlock(RealDest, EdgeBB, BB);
 
@@ -1244,7 +1396,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) {
       // Clone the instruction.
       Instruction *N = BBI->clone();
       if (BBI->hasName()) N->setName(BBI->getName()+".c");
-      
+
       // Update operands due to translation.
       for (User::op_iterator i = N->op_begin(), e = N->op_end();
            i != e; ++i) {
@@ -1252,7 +1404,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) {
         if (PI != TranslateMap.end())
           *i = PI->second;
       }
-      
+
       // Check for trivial simplification.
       if (Value *V = SimplifyInstruction(N, TD)) {
         TranslateMap[BBI] = V;
@@ -1297,7 +1449,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
       // Don't bother if the branch will be constant folded trivially.
       isa<ConstantInt>(IfCond))
     return false;
-  
+
   // Okay, we found that we can merge this two-entry phi node into a select.
   // Doing so would require us to fold *all* two entry phi nodes in this block.
   // At some point this becomes non-profitable (particularly if the target
@@ -1307,14 +1459,14 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
   for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
     if (NumPhis > 2)
       return false;
-  
+
   // Loop over the PHI's seeing if we can promote them all to select
   // instructions.  While we are at it, keep track of the instructions
   // that need to be moved to the dominating block.
   SmallPtrSet<Instruction*, 4> AggressiveInsts;
   unsigned MaxCostVal0 = PHINodeFoldingThreshold,
            MaxCostVal1 = PHINodeFoldingThreshold;
-  
+
   for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
     PHINode *PN = cast<PHINode>(II++);
     if (Value *V = SimplifyInstruction(PN, TD)) {
@@ -1322,19 +1474,19 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
       PN->eraseFromParent();
       continue;
     }
-    
+
     if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts,
                              MaxCostVal0) ||
         !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts,
                              MaxCostVal1))
       return false;
   }
-  
+
   // If we folded the first phi, PN dangles at this point.  Refresh it.  If
   // we ran out of PHIs then we simplified them all.
   PN = dyn_cast<PHINode>(BB->begin());
   if (PN == 0) return true;
-  
+
   // Don't fold i1 branches on PHIs which contain binary operators.  These can
   // often be turned into switches and other things.
   if (PN->getType()->isIntegerTy(1) &&
@@ -1342,7 +1494,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
        isa<BinaryOperator>(PN->getIncomingValue(1)) ||
        isa<BinaryOperator>(IfCond)))
     return false;
-  
+
   // If we all PHI nodes are promotable, check to make sure that all
   // instructions in the predecessor blocks can be promoted as well.  If
   // not, we won't be able to get rid of the control flow, so it's not
@@ -1362,7 +1514,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
         return false;
       }
   }
-    
+
   if (cast<BranchInst>(IfBlock2->getTerminator())->isConditional()) {
     IfBlock2 = 0;
   } else {
@@ -1375,15 +1527,15 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
         return false;
       }
   }
-  
+
   DEBUG(dbgs() << "FOUND IF CONDITION!  " << *IfCond << "  T: "
                << IfTrue->getName() << "  F: " << IfFalse->getName() << "\n");
-      
+
   // If we can still promote the PHI nodes after this gauntlet of tests,
   // do all of the PHI's now.
   Instruction *InsertPt = DomBlock->getTerminator();
   IRBuilder<true, NoFolder> Builder(InsertPt);
-  
+
   // Move all 'aggressive' instructions, which are defined in the
   // conditional parts of the if's up to the dominating block.
   if (IfBlock1)
@@ -1394,19 +1546,19 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
     DomBlock->getInstList().splice(InsertPt,
                                    IfBlock2->getInstList(), IfBlock2->begin(),
                                    IfBlock2->getTerminator());
-  
+
   while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
     // Change the PHI node into a select instruction.
     Value *TrueVal  = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse);
     Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue);
-    
-    SelectInst *NV = 
+
+    SelectInst *NV =
       cast<SelectInst>(Builder.CreateSelect(IfCond, TrueVal, FalseVal, ""));
     PN->replaceAllUsesWith(NV);
     NV->takeName(PN);
     PN->eraseFromParent();
   }
-  
+
   // At this point, IfBlock1 and IfBlock2 are both empty, so our if statement
   // has been flattened.  Change DomBlock to jump directly to our new block to
   // avoid other simplifycfg's kicking in on the diamond.
@@ -1420,14 +1572,14 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
 /// SimplifyCondBranchToTwoReturns - If we found a conditional branch that goes
 /// to two returning blocks, try to merge them together into one return,
 /// introducing a select if the return values disagree.
-static bool SimplifyCondBranchToTwoReturns(BranchInst *BI, 
+static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
                                            IRBuilder<> &Builder) {
   assert(BI->isConditional() && "Must be a conditional branch");
   BasicBlock *TrueSucc = BI->getSuccessor(0);
   BasicBlock *FalseSucc = BI->getSuccessor(1);
   ReturnInst *TrueRet = cast<ReturnInst>(TrueSucc->getTerminator());
   ReturnInst *FalseRet = cast<ReturnInst>(FalseSucc->getTerminator());
-  
+
   // Check to ensure both blocks are empty (just a return) or optionally empty
   // with PHI nodes.  If there are other instructions, merging would cause extra
   // computation on one path or the other.
@@ -1447,12 +1599,12 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
     EraseTerminatorInstAndDCECond(BI);
     return true;
   }
-    
+
   // Otherwise, figure out what the true and false return values are
   // so we can insert a new select instruction.
   Value *TrueValue = TrueRet->getReturnValue();
   Value *FalseValue = FalseRet->getReturnValue();
-  
+
   // Unwrap any PHI nodes in the return blocks.
   if (PHINode *TVPN = dyn_cast_or_null<PHINode>(TrueValue))
     if (TVPN->getParent() == TrueSucc)
@@ -1460,7 +1612,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
   if (PHINode *FVPN = dyn_cast_or_null<PHINode>(FalseValue))
     if (FVPN->getParent() == FalseSucc)
       FalseValue = FVPN->getIncomingValueForBlock(BI->getParent());
-  
+
   // In order for this transformation to be safe, we must be able to
   // unconditionally execute both operands to the return.  This is
   // normally the case, but we could have a potentially-trapping
@@ -1472,12 +1624,12 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
   if (ConstantExpr *FCV = dyn_cast_or_null<ConstantExpr>(FalseValue))
     if (FCV->canTrap())
       return false;
-  
+
   // Okay, we collected all the mapped values and checked them for sanity, and
   // defined to really do this transformation.  First, update the CFG.
   TrueSucc->removePredecessor(BI->getParent());
   FalseSucc->removePredecessor(BI->getParent());
-  
+
   // Insert select instructions where needed.
   Value *BrCond = BI->getCondition();
   if (TrueValue) {
@@ -1491,15 +1643,15 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
     }
   }
 
-  Value *RI = !TrueValue ? 
+  Value *RI = !TrueValue ?
     Builder.CreateRetVoid() : Builder.CreateRet(TrueValue);
 
   (void) RI;
-      
+
   DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:"
                << "\n  " << *BI << "NewRet = " << *RI
                << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc);
-      
+
   EraseTerminatorInstAndDCECond(BI);
 
   return true;
@@ -1600,7 +1752,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
     if (Cond == 0)
       return false;
   }
-     
+
   if (Cond == 0 || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
     Cond->getParent() != BB || !Cond->hasOneUse())
   return false;
@@ -1623,7 +1775,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
       isSafeToSpeculativelyExecute(FrontIt)) {
     BonusInst = &*FrontIt;
     ++FrontIt;
-    
+
     // Ignore dbg intrinsics.
     while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt;
   }
@@ -1631,13 +1783,13 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
   // Only a single bonus inst is allowed.
   if (&*FrontIt != Cond)
     return false;
-  
+
   // Make sure the instruction after the condition is the cond branch.
   BasicBlock::iterator CondIt = Cond; ++CondIt;
 
   // Ingore dbg intrinsics.
   while (isa<DbgInfoIntrinsic>(CondIt)) ++CondIt;
-  
+
   if (&*CondIt != BI)
     return false;
 
@@ -1649,7 +1801,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(1)))
     if (CE->canTrap())
       return false;
-  
+
   // Finally, don't infinitely unroll conditional loops.
   BasicBlock *TrueDest  = BI->getSuccessor(0);
   BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : 0;
@@ -1659,22 +1811,22 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
   for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
     BasicBlock *PredBlock = *PI;
     BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
-    
+
     // Check that we have two conditional branches.  If there is a PHI node in
     // the common successor, verify that the same value flows in from both
     // blocks.
     SmallVector<PHINode*, 4> PHIs;
     if (PBI == 0 || PBI->isUnconditional() ||
-        (BI->isConditional() && 
+        (BI->isConditional() &&
          !SafeToMergeTerminators(BI, PBI)) ||
         (!BI->isConditional() &&
          !isProfitableToFoldUnconditional(BI, PBI, Cond, PHIs)))
       continue;
-    
+
     // Determine if the two branches share a common destination.
     Instruction::BinaryOps Opc;
     bool InvertPredCond = false;
-    
+
     if (BI->isConditional()) {
       if (PBI->getSuccessor(0) == TrueDest)
         Opc = Instruction::Or;
@@ -1693,7 +1845,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
 
     // Ensure that any values used in the bonus instruction are also used
     // by the terminator of the predecessor.  This means that those values
-    // must already have been resolved, so we won't be inhibiting the 
+    // must already have been resolved, so we won't be inhibiting the
     // out-of-order core by speculating them earlier.
     if (BonusInst) {
       // Collect the values used by the bonus inst
@@ -1707,47 +1859,47 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
 
       SmallVector<std::pair<Value*, unsigned>, 4> Worklist;
       Worklist.push_back(std::make_pair(PBI->getOperand(0), 0));
-      
+
       // Walk up to four levels back up the use-def chain of the predecessor's
       // terminator to see if all those values were used.  The choice of four
       // levels is arbitrary, to provide a compile-time-cost bound.
       while (!Worklist.empty()) {
         std::pair<Value*, unsigned> Pair = Worklist.back();
         Worklist.pop_back();
-        
+
         if (Pair.second >= 4) continue;
         UsedValues.erase(Pair.first);
         if (UsedValues.empty()) break;
-        
+
         if (Instruction *I = dyn_cast<Instruction>(Pair.first)) {
           for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
                OI != OE; ++OI)
             Worklist.push_back(std::make_pair(OI->get(), Pair.second+1));
-        }       
+        }
       }
-      
+
       if (!UsedValues.empty()) return false;
     }
 
     DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
-    IRBuilder<> Builder(PBI);    
+    IRBuilder<> Builder(PBI);
 
     // If we need to invert the condition in the pred block to match, do so now.
     if (InvertPredCond) {
       Value *NewCond = PBI->getCondition();
-      
+
       if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) {
         CmpInst *CI = cast<CmpInst>(NewCond);
         CI->setPredicate(CI->getInversePredicate());
       } else {
-        NewCond = Builder.CreateNot(NewCond, 
+        NewCond = Builder.CreateNot(NewCond,
                                     PBI->getCondition()->getName()+".not");
       }
-      
+
       PBI->setCondition(NewCond);
       PBI->swapSuccessors();
     }
-    
+
     // If we have a bonus inst, clone it into the predecessor block.
     Instruction *NewBonus = 0;
     if (BonusInst) {
@@ -1756,7 +1908,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
       NewBonus->takeName(BonusInst);
       BonusInst->setName(BonusInst->getName()+".old");
     }
-    
+
     // Clone Cond into the predecessor basic block, and or/and the
     // two conditions together.
     Instruction *New = Cond->clone();
@@ -1764,9 +1916,9 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
     PredBlock->getInstList().insert(PBI, New);
     New->takeName(Cond);
     Cond->setName(New->getName()+".old");
-    
+
     if (BI->isConditional()) {
-      Instruction *NewCond = 
+      Instruction *NewCond =
         cast<Instruction>(Builder.CreateBinOp(Opc, PBI->getCondition(),
                                             New, "or.cond"));
       PBI->setCondition(NewCond);
@@ -1806,7 +1958,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
           // Create (PBI_Cond and BI_Value) or (!PBI_Cond and PBI_C)
           // PBI_C is true: (PBI_Cond and BI_Value) or (!PBI_Cond)
           //       is false: PBI_Cond and BI_Value
-          MergedCond = 
+          MergedCond =
             cast<Instruction>(Builder.CreateBinOp(Instruction::And,
                                 PBI->getCondition(), New,
                                 "and.cond"));
@@ -1814,7 +1966,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
             Instruction *NotCond =
               cast<Instruction>(Builder.CreateNot(PBI->getCondition(),
                                   "not.cond"));
-            MergedCond = 
+            MergedCond =
               cast<Instruction>(Builder.CreateBinOp(Instruction::Or,
                                   NotCond, MergedCond,
                                   "or.cond"));
@@ -1921,7 +2073,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
     for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
       if (isa<DbgInfoIntrinsic>(*I))
         I->clone()->insertBefore(PBI);
-      
+
     return true;
   }
   return false;
@@ -1936,7 +2088,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
   BasicBlock *BB = BI->getParent();
 
   // If this block ends with a branch instruction, and if there is a
-  // predecessor that ends on a branch of the same condition, make 
+  // predecessor that ends on a branch of the same condition, make
   // this conditional branch redundant.
   if (PBI->getCondition() == BI->getCondition() &&
       PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
@@ -1945,11 +2097,11 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
     if (BB->getSinglePredecessor()) {
       // Turn this into a branch on constant.
       bool CondIsTrue = PBI->getSuccessor(0) == BB;
-      BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()), 
+      BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
                                         CondIsTrue));
       return true;  // Nuke the branch on constant.
     }
-    
+
     // Otherwise, if there are multiple predecessors, insert a PHI that merges
     // in the constant and simplify the block result.  Subsequent passes of
     // simplifycfg will thread the block.
@@ -1969,18 +2121,18 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
             PBI->getCondition() == BI->getCondition() &&
             PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
           bool CondIsTrue = PBI->getSuccessor(0) == BB;
-          NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()), 
+          NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
                                               CondIsTrue), P);
         } else {
           NewPN->addIncoming(BI->getCondition(), P);
         }
       }
-      
+
       BI->setCondition(NewPN);
       return true;
     }
   }
-  
+
   // If this is a conditional branch in an empty block, and if any
   // predecessors is a conditional branch to one of our destinations,
   // fold the conditions into logical ops and one cond br.
@@ -1991,11 +2143,11 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
   if (&*BBI != BI)
     return false;
 
-  
+
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BI->getCondition()))
     if (CE->canTrap())
       return false;
-  
+
   int PBIOp, BIOp;
   if (PBI->getSuccessor(0) == BI->getSuccessor(0))
     PBIOp = BIOp = 0;
@@ -2007,31 +2159,31 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
     PBIOp = BIOp = 1;
   else
     return false;
-    
+
   // Check to make sure that the other destination of this branch
   // isn't BB itself.  If so, this is an infinite loop that will
   // keep getting unwound.
   if (PBI->getSuccessor(PBIOp) == BB)
     return false;
-    
-  // Do not perform this transformation if it would require 
+
+  // Do not perform this transformation if it would require
   // insertion of a large number of select instructions. For targets
   // without predication/cmovs, this is a big pessimization.
   BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
-      
+
   unsigned NumPhis = 0;
   for (BasicBlock::iterator II = CommonDest->begin();
        isa<PHINode>(II); ++II, ++NumPhis)
     if (NumPhis > 2) // Disable this xform.
       return false;
-    
+
   // Finally, if everything is ok, fold the branches to logical ops.
   BasicBlock *OtherDest  = BI->getSuccessor(BIOp ^ 1);
-  
+
   DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
                << "AND: " << *BI->getParent());
-  
-  
+
+
   // If OtherDest *is* BB, then BB is a basic block with a single conditional
   // branch in it, where one edge (OtherDest) goes back to itself but the other
   // exits.  We don't *know* that the program avoids the infinite loop
@@ -2046,13 +2198,13 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
                                                   "infloop", BB->getParent());
     BranchInst::Create(InfLoopBlock, InfLoopBlock);
     OtherDest = InfLoopBlock;
-  }  
-  
+  }
+
   DEBUG(dbgs() << *PBI->getParent()->getParent());
 
   // BI may have other predecessors.  Because of this, we leave
   // it alone, but modify PBI.
-  
+
   // Make sure we get to CommonDest on True&True directions.
   Value *PBICond = PBI->getCondition();
   IRBuilder<true, NoFolder> Builder(PBI);
@@ -2065,16 +2217,16 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
 
   // Merge the conditions.
   Value *Cond = Builder.CreateOr(PBICond, BICond, "brmerge");
-  
+
   // Modify PBI to branch on the new condition to the new dests.
   PBI->setCondition(Cond);
   PBI->setSuccessor(0, CommonDest);
   PBI->setSuccessor(1, OtherDest);
-  
+
   // OtherDest may have phi nodes.  If so, add an entry from PBI's
   // block that are identical to the entries for BI's block.
   AddPredecessorToBlock(OtherDest, PBI->getParent(), BB);
-  
+
   // We know that the CommonDest already had an edge from PBI to
   // it.  If it has PHIs though, the PHIs may have different
   // entries for BB and PBI's BB.  If so, insert a select to make
@@ -2092,10 +2244,10 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
       PN->setIncomingValue(PBBIdx, NV);
     }
   }
-  
+
   DEBUG(dbgs() << "INTO: " << *PBI->getParent());
   DEBUG(dbgs() << *PBI->getParent()->getParent());
-  
+
   // This basic block is probably dead.  We know it has at least
   // one fewer predecessor.
   return true;
@@ -2214,7 +2366,7 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
 ///   br label %end
 /// end:
 ///   ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
-/// 
+///
 /// We prefer to split the edge to 'end' so that there is a true/false entry to
 /// the PHI, merging the third icmp into the switch.
 static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
@@ -2228,17 +2380,17 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
 
   Value *V = ICI->getOperand(0);
   ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
-  
+
   // The pattern we're looking for is where our only predecessor is a switch on
   // 'V' and this block is the default case for the switch.  In this case we can
   // fold the compared value into the switch to simplify things.
   BasicBlock *Pred = BB->getSinglePredecessor();
   if (Pred == 0 || !isa<SwitchInst>(Pred->getTerminator())) return false;
-  
+
   SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
   if (SI->getCondition() != V)
     return false;
-  
+
   // If BB is reachable on a non-default case, then we simply know the value of
   // V in this block.  Substitute it and constant fold the icmp instruction
   // away.
@@ -2246,7 +2398,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
     ConstantInt *VVal = SI->findCaseDest(BB);
     assert(VVal && "Should have a unique destination value");
     ICI->setOperand(0, VVal);
-    
+
     if (Value *V = SimplifyInstruction(ICI, TD)) {
       ICI->replaceAllUsesWith(V);
       ICI->eraseFromParent();
@@ -2254,7 +2406,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
     // BB is now empty, so it is likely to simplify away.
     return SimplifyCFG(BB) | true;
   }
-  
+
   // Ok, the block is reachable from the default dest.  If the constant we're
   // comparing exists in one of the other edges, then we can constant fold ICI
   // and zap it.
@@ -2264,13 +2416,13 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
       V = ConstantInt::getFalse(BB->getContext());
     else
       V = ConstantInt::getTrue(BB->getContext());
-    
+
     ICI->replaceAllUsesWith(V);
     ICI->eraseFromParent();
     // BB is now empty, so it is likely to simplify away.
     return SimplifyCFG(BB) | true;
   }
-  
+
   // The use of the icmp has to be in the 'end' block, by the only PHI node in
   // the block.
   BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
@@ -2297,7 +2449,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
   BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "switch.edge",
                                          BB->getParent(), BB);
   SI->addCase(Cst, NewBB);
-  
+
   // NewBB branches to the phi block, add the uncond branch and the phi entry.
   Builder.SetInsertPoint(NewBB);
   Builder.SetCurrentDebugLocation(SI->getDebugLoc());
@@ -2313,8 +2465,8 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD,
                                       IRBuilder<> &Builder) {
   Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
   if (Cond == 0) return false;
-  
-  
+
+
   // Change br (X == 0 | X == 1), T, F into a switch instruction.
   // If this is a bunch of seteq's or'd together, or if it's a bunch of
   // 'setne's and'ed together, collect them.
@@ -2323,7 +2475,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD,
   bool TrueWhenEqual = true;
   Value *ExtraCase = 0;
   unsigned UsedICmps = 0;
-  
+
   if (Cond->getOpcode() == Instruction::Or) {
     CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, true,
                                      UsedICmps);
@@ -2332,7 +2484,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD,
                                      UsedICmps);
     TrueWhenEqual = false;
   }
-  
+
   // If we didn't have a multiply compared value, fail.
   if (CompVal == 0) return false;
 
@@ -2344,21 +2496,24 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD,
   // instruction can't handle, remove them now.
   array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate);
   Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
-  
+
   // If Extra was used, we require at least two switch values to do the
   // transformation.  A switch with one value is just an cond branch.
   if (ExtraCase && Values.size() < 2) return false;
-  
+
+  // TODO: Preserve branch weight metadata, similarly to how
+  // FoldValueComparisonIntoPredecessors preserves it.
+
   // Figure out which block is which destination.
   BasicBlock *DefaultBB = BI->getSuccessor(1);
   BasicBlock *EdgeBB    = BI->getSuccessor(0);
   if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB);
-  
+
   BasicBlock *BB = BI->getParent();
-  
+
   DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
                << " cases into SWITCH.  BB is:\n" << *BB);
-  
+
   // If there are any extra values that couldn't be folded into the switch
   // then we evaluate them with an explicit branch first.  Split the block
   // right before the condbr to handle it.
@@ -2372,13 +2527,13 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD,
       Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
     else
       Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
-      
+
     OldTI->eraseFromParent();
-    
+
     // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
     // for the edge we just added.
     AddPredecessorToBlock(EdgeBB, BB, NewBB);
-    
+
     DEBUG(dbgs() << "  ** 'icmp' chain unhandled condition: " << *ExtraCase
           << "\nEXTRABB = " << *BB);
     BB = NewBB;
@@ -2392,14 +2547,14 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD,
                                      TD->getIntPtrType(CompVal->getContext()),
                                      "magicptr");
   }
-  
+
   // Create the new switch instruction now.
   SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
 
   // Add all of the 'cases' to the switch instruction.
   for (unsigned i = 0, e = Values.size(); i != e; ++i)
     New->addCase(Values[i], EdgeBB);
-  
+
   // We added edges from PI to the EdgeBB.  As such, if there were any
   // PHI nodes in EdgeBB, they need entries to be added corresponding to
   // the number of edges added.
@@ -2410,10 +2565,10 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD,
     for (unsigned i = 0, e = Values.size()-1; i != e; ++i)
       PN->addIncoming(InVal, BB);
   }
-  
+
   // Erase the old branch instruction.
   EraseTerminatorInstAndDCECond(BI);
-  
+
   DEBUG(dbgs() << "  ** 'icmp' chain result is:\n" << *BB << '\n');
   return true;
 }
@@ -2467,7 +2622,7 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
 bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
   BasicBlock *BB = RI->getParent();
   if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false;
-  
+
   // Find predecessors that end with branches.
   SmallVector<BasicBlock*, 8> UncondBranchPreds;
   SmallVector<BranchInst*, 8> CondBranchPreds;
@@ -2481,7 +2636,7 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
         CondBranchPreds.push_back(BI);
     }
   }
-  
+
   // If we found some, do the transformation!
   if (!UncondBranchPreds.empty() && DupRet) {
     while (!UncondBranchPreds.empty()) {
@@ -2490,21 +2645,21 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
             << "INTO UNCOND BRANCH PRED: " << *Pred);
       (void)FoldReturnIntoUncondBranch(RI, BB, Pred);
     }
-    
+
     // If we eliminated all predecessors of the block, delete the block now.
     if (pred_begin(BB) == pred_end(BB))
       // We know there are no successors, so just nuke the block.
       BB->eraseFromParent();
-    
+
     return true;
   }
-  
+
   // Check out all of the conditional branches going to this return
   // instruction.  If any of them just select between returns, change the
   // branch itself into a select/return pair.
   while (!CondBranchPreds.empty()) {
     BranchInst *BI = CondBranchPreds.pop_back_val();
-    
+
     // Check to see if the non-BB successor is also a return block.
     if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) &&
         isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) &&
@@ -2516,9 +2671,9 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
 
 bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
   BasicBlock *BB = UI->getParent();
-  
+
   bool Changed = false;
-  
+
   // If there are any instructions immediately before the unreachable that can
   // be removed, do so.
   while (UI != BB->begin()) {
@@ -2558,11 +2713,11 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
     BBI->eraseFromParent();
     Changed = true;
   }
-  
+
   // If the unreachable instruction is the first in the block, take a gander
   // at all of the predecessors of this instruction, and simplify them.
   if (&BB->front() != UI) return Changed;
-  
+
   SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
   for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
     TerminatorInst *TI = Preds[i]->getTerminator();
@@ -2615,7 +2770,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
         BasicBlock *MaxBlock = 0;
         for (std::map<BasicBlock*, std::pair<unsigned, unsigned> >::iterator
              I = Popularity.begin(), E = Popularity.end(); I != E; ++I) {
-          if (I->second.first > MaxPop || 
+          if (I->second.first > MaxPop ||
               (I->second.first == MaxPop && MaxIndex > I->second.second)) {
             MaxPop = I->second.first;
             MaxIndex = I->second.second;
@@ -2627,13 +2782,13 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
           // edges to it.
           SI->setDefaultDest(MaxBlock);
           Changed = true;
-          
+
           // If MaxBlock has phinodes in it, remove MaxPop-1 entries from
           // it.
           if (isa<PHINode>(MaxBlock->begin()))
             for (unsigned i = 0; i != MaxPop-1; ++i)
               MaxBlock->removePredecessor(SI->getParent());
-          
+
           for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
                i != e; ++i)
             if (i.getCaseSuccessor() == MaxBlock) {
@@ -2648,7 +2803,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
         // place to note that the call does not throw though.
         BranchInst *BI = Builder.CreateBr(II->getNormalDest());
         II->removeFromParent();   // Take out of symbol table
-        
+
         // Insert the call now...
         SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3);
         Builder.SetInsertPoint(BI);
@@ -2663,7 +2818,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
       }
     }
   }
-  
+
   // If this block is now dead, remove it.
   if (pred_begin(BB) == pred_end(BB) &&
       BB != &BB->getParent()->getEntryBlock()) {
@@ -2823,6 +2978,285 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
   return Changed;
 }
 
+/// ValidLookupTableConstant - Return true if the backend will be able to handle
+/// initializing an array of constants like C.
+static bool ValidLookupTableConstant(Constant *C) {
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+    return CE->isGEPWithNoNotionalOverIndexing();
+
+  return isa<ConstantFP>(C) ||
+      isa<ConstantInt>(C) ||
+      isa<ConstantPointerNull>(C) ||
+      isa<GlobalValue>(C) ||
+      isa<UndefValue>(C);
+}
+
+/// GetCaseResulsts - Try to determine the resulting constant values in phi
+/// nodes at the common destination basic block for one of the case
+/// destinations of a switch instruction.
+static bool GetCaseResults(SwitchInst *SI,
+                           BasicBlock *CaseDest,
+                           BasicBlock **CommonDest,
+                           SmallVector<std::pair<PHINode*,Constant*>, 4> &Res) {
+  // The block from which we enter the common destination.
+  BasicBlock *Pred = SI->getParent();
+
+  // If CaseDest is empty, continue to its successor.
+  if (CaseDest->getFirstNonPHIOrDbg() == CaseDest->getTerminator() &&
+      !isa<PHINode>(CaseDest->begin())) {
+
+    TerminatorInst *Terminator = CaseDest->getTerminator();
+    if (Terminator->getNumSuccessors() != 1)
+      return false;
+
+    Pred = CaseDest;
+    CaseDest = Terminator->getSuccessor(0);
+  }
+
+  // If we did not have a CommonDest before, use the current one.
+  if (!*CommonDest)
+    *CommonDest = CaseDest;
+  // If the destination isn't the common one, abort.
+  if (CaseDest != *CommonDest)
+    return false;
+
+  // Get the values for this case from phi nodes in the destination block.
+  BasicBlock::iterator I = (*CommonDest)->begin();
+  while (PHINode *PHI = dyn_cast<PHINode>(I++)) {
+    int Idx = PHI->getBasicBlockIndex(Pred);
+    if (Idx == -1)
+      continue;
+
+    Constant *ConstVal = dyn_cast<Constant>(PHI->getIncomingValue(Idx));
+    if (!ConstVal)
+      return false;
+
+    // Be conservative about which kinds of constants we support.
+    if (!ValidLookupTableConstant(ConstVal))
+      return false;
+
+    Res.push_back(std::make_pair(PHI, ConstVal));
+  }
+
+  return true;
+}
+
+/// BuildLookupTable - Build a lookup table with the contents of Results, using
+/// DefaultResult to fill the holes in the table. If the table ends up
+/// containing the same result in each element, set *SingleResult to that value
+/// and return NULL.
+static GlobalVariable *BuildLookupTable(Module &M,
+                                        uint64_t TableSize,
+                                        ConstantInt *Offset,
+              const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Results,
+                                        Constant *DefaultResult,
+                                        Constant **SingleResult) {
+  assert(Results.size() && "Need values to build lookup table");
+  assert(TableSize >= Results.size() && "Table needs to hold all values");
+
+  // If all values in the table are equal, this is that value.
+  Constant *SameResult = Results.begin()->second;
+
+  // Build up the table contents.
+  std::vector<Constant*> TableContents(TableSize);
+  for (size_t I = 0, E = Results.size(); I != E; ++I) {
+    ConstantInt *CaseVal = Results[I].first;
+    Constant *CaseRes = Results[I].second;
+
+    uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
+    TableContents[Idx] = CaseRes;
+
+    if (CaseRes != SameResult)
+      SameResult = NULL;
+  }
+
+  // Fill in any holes in the table with the default result.
+  if (Results.size() < TableSize) {
+    for (unsigned i = 0; i < TableSize; ++i) {
+      if (!TableContents[i])
+        TableContents[i] = DefaultResult;
+    }
+
+    if (DefaultResult != SameResult)
+      SameResult = NULL;
+  }
+
+  // Same result was used in the entire table; just return that.
+  if (SameResult) {
+    *SingleResult = SameResult;
+    return NULL;
+  }
+
+  ArrayType *ArrayTy = ArrayType::get(DefaultResult->getType(), TableSize);
+  Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
+
+  GlobalVariable *GV = new GlobalVariable(M, ArrayTy, /*constant=*/ true,
+                                          GlobalVariable::PrivateLinkage,
+                                          Initializer,
+                                          "switch.table");
+  GV->setUnnamedAddr(true);
+  return GV;
+}
+
+/// SwitchToLookupTable - If the switch is only used to initialize one or more
+/// phi nodes in a common successor block with different constant values,
+/// replace the switch with lookup tables.
+static bool SwitchToLookupTable(SwitchInst *SI,
+                                IRBuilder<> &Builder) {
+  assert(SI->getNumCases() > 1 && "Degenerate switch?");
+  // FIXME: Handle unreachable cases.
+
+  // FIXME: If the switch is too sparse for a lookup table, perhaps we could
+  // split off a dense part and build a lookup table for that.
+
+  // FIXME: If the results are all integers and the lookup table would fit in a
+  // target-legal register, we should store them as a bitmap and use shift/mask
+  // to look up the result.
+
+  // FIXME: This creates arrays of GEPs to constant strings, which means each
+  // GEP needs a runtime relocation in PIC code. We should just build one big
+  // string and lookup indices into that.
+
+  // Ignore the switch if the number of cases are too small.
+  // This is similar to the check when building jump tables in
+  // SelectionDAGBuilder::handleJTSwitchCase.
+  // FIXME: Determine the best cut-off.
+  if (SI->getNumCases() < 4)
+    return false;
+
+  // Figure out the corresponding result for each case value and phi node in the
+  // common destination, as well as the the min and max case values.
+  assert(SI->case_begin() != SI->case_end());
+  SwitchInst::CaseIt CI = SI->case_begin();
+  ConstantInt *MinCaseVal = CI.getCaseValue();
+  ConstantInt *MaxCaseVal = CI.getCaseValue();
+
+  BasicBlock *CommonDest = NULL;
+  typedef SmallVector<std::pair<ConstantInt*, Constant*>, 4> ResultListTy;
+  SmallDenseMap<PHINode*, ResultListTy> ResultLists;
+  SmallDenseMap<PHINode*, Constant*> DefaultResults;
+  SmallDenseMap<PHINode*, Type*> ResultTypes;
+  SmallVector<PHINode*, 4> PHIs;
+
+  for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
+    ConstantInt *CaseVal = CI.getCaseValue();
+    if (CaseVal->getValue().slt(MinCaseVal->getValue()))
+      MinCaseVal = CaseVal;
+    if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
+      MaxCaseVal = CaseVal;
+
+    // Resulting value at phi nodes for this case value.
+    typedef SmallVector<std::pair<PHINode*, Constant*>, 4> ResultsTy;
+    ResultsTy Results;
+    if (!GetCaseResults(SI, CI.getCaseSuccessor(), &CommonDest, Results))
+      return false;
+
+    // Append the result from this case to the list for each phi.
+    for (ResultsTy::iterator I = Results.begin(), E = Results.end(); I!=E; ++I) {
+      if (!ResultLists.count(I->first))
+        PHIs.push_back(I->first);
+      ResultLists[I->first].push_back(std::make_pair(CaseVal, I->second));
+    }
+  }
+
+  // Get the resulting values for the default case.
+  SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList;
+  if (!GetCaseResults(SI, SI->getDefaultDest(), &CommonDest, DefaultResultsList))
+    return false;
+  for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) {
+    PHINode *PHI = DefaultResultsList[I].first;
+    Constant *Result = DefaultResultsList[I].second;
+    DefaultResults[PHI] = Result;
+    ResultTypes[PHI] = Result->getType();
+  }
+
+  APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue();
+  // The table density should be at lest 40%. This is the same criterion as for
+  // jump tables, see SelectionDAGBuilder::handleJTSwitchCase.
+  // FIXME: Find the best cut-off.
+  // Be careful to avoid overlow in the density computation.
+  if (RangeSpread.zextOrSelf(64).ugt(UINT64_MAX / 4 - 1))
+    return false;
+  uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
+  if (SI->getNumCases() * 10 < TableSize * 4)
+    return false;
+
+  // Build the lookup tables.
+  SmallDenseMap<PHINode*, GlobalVariable*> LookupTables;
+  SmallDenseMap<PHINode*, Constant*> SingleResults;
+
+  Module &Mod = *CommonDest->getParent()->getParent();
+  for (SmallVector<PHINode*, 4>::iterator I = PHIs.begin(), E = PHIs.end();
+       I != E; ++I) {
+    PHINode *PHI = *I;
+
+    Constant *SingleResult = NULL;
+    LookupTables[PHI] = BuildLookupTable(Mod, TableSize, MinCaseVal,
+                                         ResultLists[PHI], DefaultResults[PHI],
+                                         &SingleResult);
+    SingleResults[PHI] = SingleResult;
+  }
+
+  // Create the BB that does the lookups.
+  BasicBlock *LookupBB = BasicBlock::Create(Mod.getContext(),
+                                            "switch.lookup",
+                                            CommonDest->getParent(),
+                                            CommonDest);
+
+  // Check whether the condition value is within the case range, and branch to
+  // the new BB.
+  Builder.SetInsertPoint(SI);
+  Value *TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal,
+                                        "switch.tableidx");
+  Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get(
+      MinCaseVal->getType(), TableSize));
+  Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
+
+  // Populate the BB that does the lookups.
+  Builder.SetInsertPoint(LookupBB);
+  bool ReturnedEarly = false;
+  for (SmallVector<PHINode*, 4>::iterator I = PHIs.begin(), E = PHIs.end();
+       I != E; ++I) {
+    PHINode *PHI = *I;
+    // There was a single result for this phi; just use that.
+    if (Constant *SingleResult = SingleResults[PHI]) {
+      PHI->addIncoming(SingleResult, LookupBB);
+      continue;
+    }
+
+    Value *GEPIndices[] = { Builder.getInt32(0), TableIndex };
+    Value *GEP = Builder.CreateInBoundsGEP(LookupTables[PHI], GEPIndices,
+                                           "switch.gep");
+    Value *Result = Builder.CreateLoad(GEP, "switch.load");
+
+    // If the result is only going to be used to return from the function,
+    // we want to do that right here.
+    if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->use_begin())) {
+      if (CommonDest->getFirstNonPHIOrDbg() == CommonDest->getTerminator()) {
+        Builder.CreateRet(Result);
+        ReturnedEarly = true;
+      }
+    }
+
+    if (!ReturnedEarly)
+      PHI->addIncoming(Result, LookupBB);
+  }
+
+  if (!ReturnedEarly)
+    Builder.CreateBr(CommonDest);
+
+  // Remove the switch.
+  for (unsigned i = 0; i < SI->getNumSuccessors(); ++i) {
+    BasicBlock *Succ = SI->getSuccessor(i);
+    if (Succ == SI->getDefaultDest()) continue;
+    Succ->removePredecessor(SI->getParent());
+  }
+  SI->eraseFromParent();
+
+  ++NumLookupTables;
+  return true;
+}
+
 bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
   // If this switch is too complex to want to look at, ignore it.
   if (!isValueEqualityComparison(SI))
@@ -2862,13 +3296,16 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
   if (ForwardSwitchConditionToPHI(SI))
     return SimplifyCFG(BB) | true;
 
+  if (SwitchToLookupTable(SI, Builder))
+    return SimplifyCFG(BB) | true;
+
   return false;
 }
 
 bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
   BasicBlock *BB = IBI->getParent();
   bool Changed = false;
-  
+
   // Eliminate redundant destinations.
   SmallPtrSet<Value *, 8> Succs;
   for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
@@ -2879,7 +3316,7 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
       --i; --e;
       Changed = true;
     }
-  } 
+  }
 
   if (IBI->getNumDestinations() == 0) {
     // If the indirectbr has no successors, change it to unreachable.
@@ -2887,14 +3324,14 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
     EraseTerminatorInstAndDCECond(IBI);
     return true;
   }
-  
+
   if (IBI->getNumDestinations() == 1) {
     // If the indirectbr has one successor, change it to a direct branch.
     BranchInst::Create(IBI->getDestination(0), IBI);
     EraseTerminatorInstAndDCECond(IBI);
     return true;
   }
-  
+
   if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
     if (SimplifyIndirectBrOnSelect(IBI, SI))
       return SimplifyCFG(BB) | true;
@@ -2904,13 +3341,13 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
 
 bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
   BasicBlock *BB = BI->getParent();
-  
+
   // If the Terminator is the only non-phi instruction, simplify the block.
   BasicBlock::iterator I = BB->getFirstNonPHIOrDbgOrLifetime();
   if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
       TryToSimplifyUncondBranchFromEmptyBlock(BB))
     return true;
-  
+
   // If the only instruction in the block is a seteq/setne comparison
   // against a constant, try to simplify the block.
   if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
@@ -2921,7 +3358,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
           TryToSimplifyUncondBranchWithICmpInIt(ICI, TD, Builder))
         return true;
     }
-  
+
   // If this basic block is ONLY a compare and a branch, and if a predecessor
   // branches to us and our successor, fold the comparison into the
   // predecessor and use logical operations to update the incoming value
@@ -2934,7 +3371,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
 
 bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
   BasicBlock *BB = BI->getParent();
-  
+
   // Conditional branch
   if (isValueEqualityComparison(BI)) {
     // If we only have one predecessor, and if it is a branch on this value,
@@ -2943,7 +3380,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
     if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
       if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
         return SimplifyCFG(BB) | true;
-    
+
     // This block must be empty, except for the setcond inst, if it exists.
     // Ignore dbg intrinsics.
     BasicBlock::iterator I = BB->begin();
@@ -2962,17 +3399,17 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
         return SimplifyCFG(BB) | true;
     }
   }
-  
+
   // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
   if (SimplifyBranchOnICmpChain(BI, TD, Builder))
     return true;
-  
+
   // If this basic block is ONLY a compare and a branch, and if a predecessor
   // branches to us and one of our successors, fold the comparison into the
   // predecessor and use logical operations to pick the right destination.
   if (FoldBranchToCommonDest(BI))
     return SimplifyCFG(BB) | true;
-  
+
   // We have a conditional branch to two blocks that are only reachable
   // from BI.  We know that the condbr dominates the two blocks, so see if
   // there is any identical code in the "then" and "else" blocks.  If so, we
@@ -2999,14 +3436,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
       if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1)))
         return SimplifyCFG(BB) | true;
   }
-  
+
   // If this is a branch on a phi node in the current block, thread control
   // through this block if any PHI node entries are constants.
   if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
     if (PN->getParent() == BI->getParent())
       if (FoldCondBranchOnPHI(BI, TD))
         return SimplifyCFG(BB) | true;
-  
+
   // Scan predecessor blocks for conditional branches.
   for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
     if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
@@ -3114,7 +3551,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
   //
   if (MergeBlockIntoPredecessor(BB))
     return true;
-  
+
   IRBuilder<> Builder(BB);
 
   // If there is a trivial two-entry PHI node in this basic block, and we can
diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp
index 81eb9e0..528e6a1 100644
--- a/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -72,7 +72,7 @@ namespace {
                 ++NumSimplified;
                 Changed = true;
               }
-            Changed |= RecursivelyDeleteTriviallyDeadInstructions(I);
+            Changed |= RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
           }
 
         // Place the list of instructions to simplify on the next loop iteration
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp
index 62d23cb..c09dcd2 100644
--- a/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -601,7 +601,7 @@ namespace {
 
     // It is important to cleanup here so that future iterations of this
     // function have less work to do.
-    (void) SimplifyInstructionsInBlock(&BB, TD);
+    (void) SimplifyInstructionsInBlock(&BB, TD, AA->getTargetLibraryInfo());
     return true;
   }
author	Stephen Hines <srhines@google.com>	2012-09-13 19:09:19 -0700
committer	Android Git Automerger <android-git-automerger@android.com>	2012-09-13 19:09:19 -0700
commit	78c041bd883d86c81c42b98f326660277e6d0d9a (patch)
tree	52800183ec2d22164b8f396842142c3a8aab912a /lib/Transforms
parent	828ded66831c0caaeecd2291a6bfb084f373d0e4 (diff)
parent	1c4ad5ef4fab105f0c8af7edd026e00502fb6279 (diff)
download	external_llvm-78c041bd883d86c81c42b98f326660277e6d0d9a.zip external_llvm-78c041bd883d86c81c42b98f326660277e6d0d9a.tar.gz external_llvm-78c041bd883d86c81c42b98f326660277e6d0d9a.tar.bz2