Merge remote-tracking branch 'upstream/release_34' into merge-20140211

Conflicts: lib/Linker/LinkModules.cpp lib/Support/Unix/Signals.inc Change-Id: Ia54f291fa5dc828052d2412736e8495c1282aa64
author: Stephen Hines <srhines@google.com> 2014-02-11 20:01:10 -0800
committer: Stephen Hines <srhines@google.com> 2014-02-11 20:01:10 -0800
commit: ce9904c6ea8fd669978a8eefb854b330eb9828ff (patch)
tree: 2418ee2e96ea220977c8fb74959192036ab5b133 /lib/Analysis
parent: c27b10b198c1d9e9b51f2303994313ec2778edd7 (diff)
parent: dbb832b83351cec97b025b61c26536ef50c3181c (diff)
download: external_llvm-ce9904c6ea8fd669978a8eefb854b330eb9828ff.zip
external_llvm-ce9904c6ea8fd669978a8eefb854b330eb9828ff.tar.gz
external_llvm-ce9904c6ea8fd669978a8eefb854b330eb9828ff.tar.bz2
39 files changed, 2243 insertions, 4648 deletions
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index 5910526..2289c12 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -299,7 +299,6 @@ bool AliasSetTracker::add(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) {
 bool AliasSetTracker::add(LoadInst *LI) {
   if (LI->getOrdering() > Monotonic) return addUnknown(LI);
   AliasSet::AccessType ATy = AliasSet::Refs;
-  if (!LI->isUnordered()) ATy = AliasSet::ModRef;
   bool NewPtr;
   AliasSet &AS = addPointer(LI->getOperand(0),
                             AA.getTypeStoreSize(LI->getType()),
@@ -312,7 +311,6 @@ bool AliasSetTracker::add(LoadInst *LI) {
 bool AliasSetTracker::add(StoreInst *SI) {
   if (SI->getOrdering() > Monotonic) return addUnknown(SI);
   AliasSet::AccessType ATy = AliasSet::Mods;
-  if (!SI->isUnordered()) ATy = AliasSet::ModRef;
   bool NewPtr;
   Value *Val = SI->getOperand(0);
   AliasSet &AS = addPointer(SI->getOperand(1),
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index 349c417..98f2a55 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -34,6 +34,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
   initializeCFGOnlyViewerPass(Registry);
   initializeCFGOnlyPrinterPass(Registry);
   initializeDependenceAnalysisPass(Registry);
+  initializeDelinearizationPass(Registry);
   initializeDominanceFrontierPass(Registry);
   initializeDomViewerPass(Registry);
   initializeDomPrinterPass(Registry);
@@ -54,16 +55,6 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
   initializeMemoryDependenceAnalysisPass(Registry);
   initializeModuleDebugInfoPrinterPass(Registry);
   initializePostDominatorTreePass(Registry);
-  initializeProfileEstimatorPassPass(Registry);
-  initializeNoProfileInfoPass(Registry);
-  initializeNoPathProfileInfoPass(Registry);
-  initializeProfileInfoAnalysisGroup(Registry);
-  initializePathProfileInfoAnalysisGroup(Registry);
-  initializeLoaderPassPass(Registry);
-  initializePathProfileLoaderPassPass(Registry);
-  initializeProfileVerifierPassPass(Registry);
-  initializePathProfileVerifierPass(Registry);
-  initializeProfileMetadataLoaderPassPass(Registry);
   initializeRegionInfoPass(Registry);
   initializeRegionViewerPass(Registry);
   initializeRegionPrinterPass(Registry);
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 9fe1362..b2c2011 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -122,7 +122,7 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size,
   //      question (in this case rewind to p), or
   //    - just give up. It is up to caller to make sure the pointer is pointing
   //      to the base address the object.
-  // 
+  //
   // We go for 2nd option for simplicity.
   if (!isIdentifiedObject(V))
     return false;
@@ -130,7 +130,7 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size,
   // This function needs to use the aligned object size because we allow
   // reads a bit past the end given sufficient alignment.
   uint64_t ObjectSize = getObjectSize(V, TD, TLI, /*RoundToAlign*/true);
-  
+
   return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize < Size;
 }
 
@@ -163,7 +163,7 @@ namespace {
     EK_SignExt,
     EK_ZeroExt
   };
-  
+
   struct VariableGEPIndex {
     const Value *V;
     ExtensionKind Extension;
@@ -200,7 +200,7 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
     Offset = 0;
     return V;
   }
-  
+
   if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) {
     if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
       switch (BOp->getOpcode()) {
@@ -231,7 +231,7 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
       }
     }
   }
-  
+
   // Since GEP indices are sign extended anyway, we don't care about the high
   // bits of a sign or zero extended value - just scales and offsets.  The
   // extensions have to be consistent though.
@@ -248,10 +248,10 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
                                         TD, Depth+1);
     Scale = Scale.zext(OldWidth);
     Offset = Offset.zext(OldWidth);
-    
+
     return Result;
   }
-  
+
   Scale = 1;
   Offset = 0;
   return V;
@@ -276,7 +276,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
                        const DataLayout *TD) {
   // Limit recursion depth to limit compile time in crazy cases.
   unsigned MaxLookup = 6;
-  
+
   BaseOffs = 0;
   do {
     // See if this is a bitcast or GEP.
@@ -291,7 +291,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
       }
       return V;
     }
-    
+
     if (Op->getOpcode() == Instruction::BitCast) {
       V = Op->getOperand(0);
       continue;
@@ -308,15 +308,14 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
           V = Simplified;
           continue;
         }
-    
+
       return V;
     }
-    
+
     // Don't attempt to analyze GEPs over unsized objects.
-    if (!cast<PointerType>(GEPOp->getOperand(0)->getType())
-        ->getElementType()->isSized())
+    if (!GEPOp->getOperand(0)->getType()->getPointerElementType()->isSized())
       return V;
-    
+
     // If we are lacking DataLayout information, we can't compute the offets of
     // elements computed by GEPs.  However, we can handle bitcast equivalent
     // GEPs.
@@ -326,7 +325,8 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
       V = GEPOp->getOperand(0);
       continue;
     }
-    
+
+    unsigned AS = GEPOp->getPointerAddressSpace();
     // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices.
     gep_type_iterator GTI = gep_type_begin(GEPOp);
     for (User::const_op_iterator I = GEPOp->op_begin()+1,
@@ -337,38 +337,37 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
         // For a struct, add the member offset.
         unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
         if (FieldNo == 0) continue;
-        
+
         BaseOffs += TD->getStructLayout(STy)->getElementOffset(FieldNo);
         continue;
       }
-      
+
       // For an array/pointer, add the element offset, explicitly scaled.
       if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
         if (CIdx->isZero()) continue;
         BaseOffs += TD->getTypeAllocSize(*GTI)*CIdx->getSExtValue();
         continue;
       }
-      
+
       uint64_t Scale = TD->getTypeAllocSize(*GTI);
       ExtensionKind Extension = EK_NotExtended;
-      
+
       // If the integer type is smaller than the pointer size, it is implicitly
       // sign extended to pointer size.
-      unsigned Width = cast<IntegerType>(Index->getType())->getBitWidth();
-      if (TD->getPointerSizeInBits() > Width)
+      unsigned Width = Index->getType()->getIntegerBitWidth();
+      if (TD->getPointerSizeInBits(AS) > Width)
         Extension = EK_SignExt;
-      
+
       // Use GetLinearExpression to decompose the index into a C1*V+C2 form.
       APInt IndexScale(Width, 0), IndexOffset(Width, 0);
       Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension,
                                   *TD, 0);
-      
+
       // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
       // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
       BaseOffs += IndexOffset.getSExtValue()*Scale;
       Scale *= IndexScale.getSExtValue();
-      
-      
+
       // If we already had an occurrence of this index variable, merge this
       // scale into it.  For example, we want to handle:
       //   A[x][x] -> x*16 + x*4 -> x*20
@@ -381,25 +380,25 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
           break;
         }
       }
-      
+
       // Make sure that we have a scale that makes sense for this target's
       // pointer size.
-      if (unsigned ShiftBits = 64-TD->getPointerSizeInBits()) {
+      if (unsigned ShiftBits = 64 - TD->getPointerSizeInBits(AS)) {
         Scale <<= ShiftBits;
         Scale = (int64_t)Scale >> ShiftBits;
       }
-      
+
       if (Scale) {
         VariableGEPIndex Entry = {Index, Extension,
                                   static_cast<int64_t>(Scale)};
         VarIndices.push_back(Entry);
       }
     }
-    
+
     // Analyze the base pointer next.
     V = GEPOp->getOperand(0);
   } while (--MaxLookup);
-  
+
   // If the chain of expressions is too deep, just return early.
   return V;
 }
@@ -407,7 +406,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
 /// GetIndexDifference - Dest and Src are the variable indices from two
 /// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base
 /// pointers.  Subtract the GEP2 indices from GEP1 to find the symbolic
-/// difference between the two pointers. 
+/// difference between the two pointers.
 static void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
                                const SmallVectorImpl<VariableGEPIndex> &Src) {
   if (Src.empty()) return;
@@ -416,12 +415,12 @@ static void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
     const Value *V = Src[i].V;
     ExtensionKind Extension = Src[i].Extension;
     int64_t Scale = Src[i].Scale;
-    
+
     // Find V in Dest.  This is N^2, but pointer indices almost never have more
     // than a few variable indexes.
     for (unsigned j = 0, e = Dest.size(); j != e; ++j) {
       if (Dest[j].V != V || Dest[j].Extension != Extension) continue;
-      
+
       // If we found it, subtract off Scale V's from the entry in Dest.  If it
       // goes to zero, remove the entry.
       if (Dest[j].Scale != Scale)
@@ -431,7 +430,7 @@ static void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
       Scale = 0;
       break;
     }
-    
+
     // If we didn't consume this entry, add it to the end of the Dest list.
     if (Scale) {
       VariableGEPIndex Entry = { V, Extension, -Scale };
@@ -526,7 +525,7 @@ namespace {
         return (AliasAnalysis*)this;
       return this;
     }
-    
+
   private:
     // AliasCache - Track alias queries to guard against recursion.
     typedef std::pair<Location, Location> LocPair;
@@ -696,7 +695,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
          "AliasAnalysis query involving multiple functions!");
 
   const Value *Object = GetUnderlyingObject(Loc.Ptr, TD);
-  
+
   // If this is a tail call and Loc.Ptr points to a stack location, we know that
   // the tail call cannot access or modify the local stack.
   // We cannot exclude byval arguments here; these belong to the caller of
@@ -706,7 +705,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
     if (const CallInst *CI = dyn_cast<CallInst>(CS.getInstruction()))
       if (CI->isTailCall())
         return NoModRef;
-  
+
   // If the pointer is to a locally allocated object that does not escape,
   // then the call can not mod/ref the pointer unless the call takes the pointer
   // as an argument, and itself doesn't capture it.
@@ -722,7 +721,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
       if (!(*CI)->getType()->isPointerTy() ||
           (!CS.doesNotCapture(ArgNo) && !CS.isByValArgument(ArgNo)))
         continue;
-      
+
       // If this is a no-capture pointer argument, see if we can tell that it
       // is impossible to alias the pointer we're checking.  If not, we have to
       // assume that the call could touch the pointer, even though it doesn't
@@ -732,7 +731,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
         break;
       }
     }
-    
+
     if (!PassedAsArg)
       return NoModRef;
   }
@@ -821,7 +820,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
     }
 
   // We can bound the aliasing properties of memset_pattern16 just as we can
-  // for memcpy/memset.  This is particularly important because the 
+  // for memcpy/memset.  This is particularly important because the
   // LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16
   // whenever possible.
   else if (TLI.has(LibFunc::memset_pattern16) &&
@@ -925,22 +924,22 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
         GEP1VariableIndices.clear();
       }
     }
-    
+
     // If we get a No or May, then return it immediately, no amount of analysis
     // will improve this situation.
     if (BaseAlias != MustAlias) return BaseAlias;
-    
+
     // Otherwise, we have a MustAlias.  Since the base pointers alias each other
     // exactly, see if the computed offset from the common pointer tells us
     // about the relation of the resulting pointer.
     const Value *GEP1BasePtr =
       DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD);
-    
+
     int64_t GEP2BaseOffset;
     SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
     const Value *GEP2BasePtr =
       DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, TD);
-    
+
     // DecomposeGEPExpression and GetUnderlyingObject should return the
     // same result except when DecomposeGEPExpression has no DataLayout.
     if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
@@ -948,12 +947,12 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
              "DecomposeGEPExpression and GetUnderlyingObject disagree!");
       return MayAlias;
     }
-    
+
     // Subtract the GEP2 pointer from the GEP1 pointer to find out their
     // symbolic difference.
     GEP1BaseOffset -= GEP2BaseOffset;
     GetIndexDifference(GEP1VariableIndices, GEP2VariableIndices);
-    
+
   } else {
     // Check to see if these two pointers are related by the getelementptr
     // instruction.  If one pointer is a GEP with a non-zero index of the other
@@ -975,7 +974,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
 
     const Value *GEP1BasePtr =
       DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD);
-    
+
     // DecomposeGEPExpression and GetUnderlyingObject should return the
     // same result except when DecomposeGEPExpression has no DataLayout.
     if (GEP1BasePtr != UnderlyingV1) {
@@ -984,7 +983,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
       return MayAlias;
     }
   }
-  
+
   // In the two GEP Case, if there is no difference in the offsets of the
   // computed pointers, the resultant pointers are a must alias.  This
   // hapens when we have two lexically identical GEP's (for example).
@@ -1226,7 +1225,7 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
     if ((isa<ConstantPointerNull>(O2) && isKnownNonNull(O1)) ||
         (isa<ConstantPointerNull>(O1) && isKnownNonNull(O2)))
       return NoAlias;
-  
+
     // If one pointer is the result of a call/invoke or load and the other is a
     // non-escaping local object within the same function, then we know the
     // object couldn't escape to a point where the call could return it.
@@ -1248,7 +1247,7 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
     if ((V1Size != UnknownSize && isObjectSmallerThan(O2, V1Size, *TD, *TLI)) ||
         (V2Size != UnknownSize && isObjectSmallerThan(O1, V2Size, *TD, *TLI)))
       return NoAlias;
-  
+
   // Check the cache before climbing up use-def chains. This also terminates
   // otherwise infinitely recursive queries.
   LocPair Locs(Location(V1, V1Size, V1TBAAInfo),
diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp
index 8469556..62f3ab1 100644
--- a/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/lib/Analysis/BlockFrequencyInfo.cpp
@@ -1,4 +1,4 @@
-//=======-------- BlockFrequencyInfo.cpp - Block Frequency Analysis -------=======//
+//=======-------- BlockFrequencyInfo.cpp - Block Frequency Analysis -------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -17,14 +17,97 @@
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/InitializePasses.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
 
 using namespace llvm;
 
-INITIALIZE_PASS_BEGIN(BlockFrequencyInfo, "block-freq", "Block Frequency Analysis",
-                      true, true)
+#ifndef NDEBUG
+enum GVDAGType {
+  GVDT_None,
+  GVDT_Fraction,
+  GVDT_Integer
+};
+
+static cl::opt<GVDAGType>
+ViewBlockFreqPropagationDAG("view-block-freq-propagation-dags", cl::Hidden,
+          cl::desc("Pop up a window to show a dag displaying how block "
+                   "frequencies propagation through the CFG."),
+          cl::values(
+            clEnumValN(GVDT_None, "none",
+                       "do not display graphs."),
+            clEnumValN(GVDT_Fraction, "fraction", "display a graph using the "
+                       "fractional block frequency representation."),
+            clEnumValN(GVDT_Integer, "integer", "display a graph using the raw "
+                       "integer fractional block frequency representation."),
+            clEnumValEnd));
+
+namespace llvm {
+
+template <>
+struct GraphTraits<BlockFrequencyInfo *> {
+  typedef const BasicBlock NodeType;
+  typedef succ_const_iterator ChildIteratorType;
+  typedef Function::const_iterator nodes_iterator;
+
+  static inline const NodeType *getEntryNode(const BlockFrequencyInfo *G) {
+    return G->getFunction()->begin();
+  }
+  static ChildIteratorType child_begin(const NodeType *N) {
+    return succ_begin(N);
+  }
+  static ChildIteratorType child_end(const NodeType *N) {
+    return succ_end(N);
+  }
+  static nodes_iterator nodes_begin(const BlockFrequencyInfo *G) {
+    return G->getFunction()->begin();
+  }
+  static nodes_iterator nodes_end(const BlockFrequencyInfo *G) {
+    return G->getFunction()->end();
+  }
+};
+
+template<>
+struct DOTGraphTraits<BlockFrequencyInfo*> : public DefaultDOTGraphTraits {
+  explicit DOTGraphTraits(bool isSimple=false) :
+    DefaultDOTGraphTraits(isSimple) {}
+
+  static std::string getGraphName(const BlockFrequencyInfo *G) {
+    return G->getFunction()->getName();
+  }
+
+  std::string getNodeLabel(const BasicBlock *Node,
+                           const BlockFrequencyInfo *Graph) {
+    std::string Result;
+    raw_string_ostream OS(Result);
+
+    OS << Node->getName().str() << ":";
+    switch (ViewBlockFreqPropagationDAG) {
+    case GVDT_Fraction:
+      Graph->getBlockFreq(Node).print(OS);
+      break;
+    case GVDT_Integer:
+      OS << Graph->getBlockFreq(Node).getFrequency();
+      break;
+    case GVDT_None:
+      llvm_unreachable("If we are not supposed to render a graph we should "
+                       "never reach this point.");
+    }
+
+    return Result;
+  }
+};
+
+} // end namespace llvm
+#endif
+
+INITIALIZE_PASS_BEGIN(BlockFrequencyInfo, "block-freq",
+                      "Block Frequency Analysis", true, true)
 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo)
-INITIALIZE_PASS_END(BlockFrequencyInfo, "block-freq", "Block Frequency Analysis",
-                    true, true)
+INITIALIZE_PASS_END(BlockFrequencyInfo, "block-freq",
+                    "Block Frequency Analysis", true, true)
 
 char BlockFrequencyInfo::ID = 0;
 
@@ -46,6 +129,10 @@ void BlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
 bool BlockFrequencyInfo::runOnFunction(Function &F) {
   BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>();
   BFI->doFunction(&F, &BPI);
+#ifndef NDEBUG
+  if (ViewBlockFreqPropagationDAG != GVDT_None)
+    view();
+#endif
   return false;
 }
 
@@ -56,3 +143,19 @@ void BlockFrequencyInfo::print(raw_ostream &O, const Module *) const {
 BlockFrequency BlockFrequencyInfo::getBlockFreq(const BasicBlock *BB) const {
   return BFI->getBlockFreq(BB);
 }
+
+/// Pop up a ghostview window with the current block frequency propagation
+/// rendered using dot.
+void BlockFrequencyInfo::view() const {
+// This code is only for debugging.
+#ifndef NDEBUG
+  ViewGraph(const_cast<BlockFrequencyInfo *>(this), "BlockFrequencyDAGs");
+#else
+  errs() << "BlockFrequencyInfo::view is only available in debug builds on "
+            "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+const Function *BlockFrequencyInfo::getFunction() const {
+  return BFI->Fn;
+}
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index 7cdf828..86560ca 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -398,10 +398,24 @@ bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) {
     // InstCombine canonicalizes X <= 0 into X < 1.
     // X <= 0   ->  Unlikely
     isProb = false;
-  } else if (CV->isAllOnesValue() && CI->getPredicate() == CmpInst::ICMP_SGT) {
-    // InstCombine canonicalizes X >= 0 into X > -1.
-    // X >= 0   ->  Likely
-    isProb = true;
+  } else if (CV->isAllOnesValue()) {
+    switch (CI->getPredicate()) {
+    case CmpInst::ICMP_EQ:
+      // X == -1  ->  Unlikely
+      isProb = false;
+      break;
+    case CmpInst::ICMP_NE:
+      // X != -1  ->  Likely
+      isProb = true;
+      break;
+    case CmpInst::ICMP_SGT:
+      // InstCombine canonicalizes X >= 0 into X > -1.
+      // X >= 0   ->  Likely
+      isProb = true;
+      break;
+    default:
+      return false;
+    }
   } else {
     return false;
   }
diff --git a/lib/Analysis/CFG.cpp b/lib/Analysis/CFG.cpp
index a5ed21a..c3f32d3 100644
--- a/lib/Analysis/CFG.cpp
+++ b/lib/Analysis/CFG.cpp
@@ -116,7 +116,7 @@ bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
 
 // LoopInfo contains a mapping from basic block to the innermost loop. Find
 // the outermost loop in the loop nest that contains BB.
-static const Loop *getOutermostLoop(LoopInfo *LI, const BasicBlock *BB) {
+static const Loop *getOutermostLoop(const LoopInfo *LI, const BasicBlock *BB) {
   const Loop *L = LI->getLoopFor(BB);
   if (L) {
     while (const Loop *Parent = L->getParentLoop())
@@ -126,60 +126,17 @@ static const Loop *getOutermostLoop(LoopInfo *LI, const BasicBlock *BB) {
 }
 
 // True if there is a loop which contains both BB1 and BB2.
-static bool loopContainsBoth(LoopInfo *LI,
+static bool loopContainsBoth(const LoopInfo *LI,
                              const BasicBlock *BB1, const BasicBlock *BB2) {
   const Loop *L1 = getOutermostLoop(LI, BB1);
   const Loop *L2 = getOutermostLoop(LI, BB2);
   return L1 != NULL && L1 == L2;
 }
 
-static bool isPotentiallyReachableSameBlock(const Instruction *A,
-                                            const Instruction *B,
-                                            LoopInfo *LI) {
-  // The same block case is special because it's the only time we're looking
-  // within a single block to see which comes first. Once we start looking at
-  // multiple blocks, the first instruction of the block is reachable, so we
-  // only need to determine reachability between whole blocks.
-
-  const BasicBlock *BB = A->getParent();
-  // If the block is in a loop then we can reach any instruction in the block
-  // from any other instruction in the block by going around the backedge.
-  // Check whether we're in a loop (or aren't sure).
-
-  // Can't be in a loop if it's the entry block -- the entry block may not
-  // have predecessors.
-  bool HasLoop = BB != &BB->getParent()->getEntryBlock();
-
-  // Can't be in a loop if LoopInfo doesn't know about it.
-  if (LI && HasLoop) {
-    HasLoop = LI->getLoopFor(BB) != 0;
-  }
-  if (HasLoop)
-    return true;
-
-  // Linear scan, start at 'A', see whether we hit 'B' or the end first.
-  for (BasicBlock::const_iterator I = A, E = BB->end(); I != E; ++I) {
-    if (&*I == B)
-      return true;
-  }
-  return false;
-}
-
-bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B,
-                                  DominatorTree *DT, LoopInfo *LI) {
-  assert(A->getParent()->getParent() == B->getParent()->getParent() &&
-         "This analysis is function-local!");
-
-  const BasicBlock *StopBB = B->getParent();
-
-  if (A->getParent() == B->getParent())
-    return isPotentiallyReachableSameBlock(A, B, LI);
-
-  if (A->getParent() == &A->getParent()->getParent()->getEntryBlock())
-    return true;
-  if (B->getParent() == &A->getParent()->getParent()->getEntryBlock())
-    return false;
-
+static bool isPotentiallyReachableInner(SmallVectorImpl<BasicBlock *> &Worklist,
+                                        BasicBlock *StopBB,
+                                        const DominatorTree *DT,
+                                        const LoopInfo *LI) {
   // When the stop block is unreachable, it's dominated from everywhere,
   // regardless of whether there's a path between the two blocks.
   if (DT && !DT->isReachableFromEntry(StopBB))
@@ -188,11 +145,7 @@ bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B,
   // Limit the number of blocks we visit. The goal is to avoid run-away compile
   // times on large CFGs without hampering sensible code. Arbitrarily chosen.
   unsigned Limit = 32;
-
   SmallSet<const BasicBlock*, 64> Visited;
-  SmallVector<BasicBlock*, 32> Worklist;
-  Worklist.push_back(const_cast<BasicBlock*>(A->getParent()));
-
   do {
     BasicBlock *BB = Worklist.pop_back_val();
     if (!Visited.insert(BB))
@@ -221,7 +174,72 @@ bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B,
     }
   } while (!Worklist.empty());
 
-  // We have exhaustived all possible paths and are certain that 'To' can not
-  // be reached from 'From'.
+  // We have exhausted all possible paths and are certain that 'To' can not be
+  // reached from 'From'.
   return false;
 }
+
+bool llvm::isPotentiallyReachable(const BasicBlock *A, const BasicBlock *B,
+                                  const DominatorTree *DT, const LoopInfo *LI) {
+  assert(A->getParent() == B->getParent() &&
+         "This analysis is function-local!");
+
+  SmallVector<BasicBlock*, 32> Worklist;
+  Worklist.push_back(const_cast<BasicBlock*>(A));
+
+  return isPotentiallyReachableInner(Worklist, const_cast<BasicBlock*>(B),
+                                     DT, LI);
+}
+
+bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B,
+                                  const DominatorTree *DT, const LoopInfo *LI) {
+  assert(A->getParent()->getParent() == B->getParent()->getParent() &&
+         "This analysis is function-local!");
+
+  SmallVector<BasicBlock*, 32> Worklist;
+
+  if (A->getParent() == B->getParent()) {
+    // The same block case is special because it's the only time we're looking
+    // within a single block to see which instruction comes first. Once we
+    // start looking at multiple blocks, the first instruction of the block is
+    // reachable, so we only need to determine reachability between whole
+    // blocks.
+    BasicBlock *BB = const_cast<BasicBlock *>(A->getParent());
+
+    // If the block is in a loop then we can reach any instruction in the block
+    // from any other instruction in the block by going around a backedge.
+    if (LI && LI->getLoopFor(BB) != 0)
+      return true;
+
+    // Linear scan, start at 'A', see whether we hit 'B' or the end first.
+    for (BasicBlock::const_iterator I = A, E = BB->end(); I != E; ++I) {
+      if (&*I == B)
+        return true;
+    }
+
+    // Can't be in a loop if it's the entry block -- the entry block may not
+    // have predecessors.
+    if (BB == &BB->getParent()->getEntryBlock())
+      return false;
+
+    // Otherwise, continue doing the normal per-BB CFG walk.
+    for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I)
+      Worklist.push_back(*I);
+
+    if (Worklist.empty()) {
+      // We've proven that there's no path!
+      return false;
+    }
+  } else {
+    Worklist.push_back(const_cast<BasicBlock*>(A->getParent()));
+  }
+
+  if (A->getParent() == &A->getParent()->getParent()->getEntryBlock())
+    return true;
+  if (B->getParent() == &A->getParent()->getParent()->getEntryBlock())
+    return false;
+
+  return isPotentiallyReachableInner(Worklist,
+                                     const_cast<BasicBlock*>(B->getParent()),
+                                     DT, LI);
+}
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index 94ded34..3624aac 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -14,6 +14,7 @@ add_llvm_library(LLVMAnalysis
   CostModel.cpp
   CodeMetrics.cpp
   ConstantFolding.cpp
+  Delinearization.cpp
   DependenceAnalysis.cpp
   DomPrinter.cpp
   DominanceFrontier.cpp
@@ -35,17 +36,7 @@ add_llvm_library(LLVMAnalysis
   ModuleDebugInfoPrinter.cpp
   NoAliasAnalysis.cpp
   PHITransAddr.cpp
-  PathNumbering.cpp
-  PathProfileInfo.cpp
-  PathProfileVerifier.cpp
   PostDominators.cpp
-  ProfileEstimatorPass.cpp
-  ProfileInfo.cpp
-  ProfileInfoLoader.cpp
-  ProfileInfoLoaderPass.cpp
-  ProfileVerifierPass.cpp
-  ProfileDataLoader.cpp
-  ProfileDataLoaderPass.cpp
   PtrUseVisitor.cpp
   RegionInfo.cpp
   RegionPass.cpp
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index 9eb76a8..79fab1b 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -146,8 +146,14 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) {
     case Instruction::PHI:
     case Instruction::Select:
       // The original value is not captured via this if the new value isn't.
+      Count = 0;
       for (Instruction::use_iterator UI = I->use_begin(), UE = I->use_end();
            UI != UE; ++UI) {
+        // If there are lots of uses, conservatively say that the value
+        // is captured to avoid taking too much compile time.
+        if (Count++ >= Threshold)
+          return Tracker->tooManyUses();
+
         Use *U = &UI.getUse();
         if (Visited.insert(U))
           if (Tracker->shouldExplore(U))
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index bc0dffc..3d32232 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -224,7 +224,8 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
                                        APInt &Offset, const DataLayout &TD) {
   // Trivial case, constant is the global.
   if ((GV = dyn_cast<GlobalValue>(C))) {
-    Offset.clearAllBits();
+    unsigned BitWidth = TD.getPointerTypeSizeInBits(GV->getType());
+    Offset = APInt(BitWidth, 0);
     return true;
   }
 
@@ -238,16 +239,23 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
     return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD);
 
   // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5)
-  if (GEPOperator *GEP = dyn_cast<GEPOperator>(CE)) {
-    // If the base isn't a global+constant, we aren't either.
-    if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD))
-      return false;
+  GEPOperator *GEP = dyn_cast<GEPOperator>(CE);
+  if (!GEP)
+    return false;
 
-    // Otherwise, add any offset that our operands provide.
-    return GEP->accumulateConstantOffset(TD, Offset);
-  }
+  unsigned BitWidth = TD.getPointerTypeSizeInBits(GEP->getType());
+  APInt TmpOffset(BitWidth, 0);
 
-  return false;
+  // If the base isn't a global+constant, we aren't either.
+  if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, TmpOffset, TD))
+    return false;
+
+  // Otherwise, add any offset that our operands provide.
+  if (!GEP->accumulateConstantOffset(TD, TmpOffset))
+    return false;
+
+  Offset = TmpOffset;
+  return true;
 }
 
 /// ReadDataFromGlobal - Recursive helper to read bits out of global.  C is the
@@ -324,12 +332,12 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
       // If we read all of the bytes we needed from this element we're done.
       uint64_t NextEltOffset = SL->getElementOffset(Index);
 
-      if (BytesLeft <= NextEltOffset-CurEltOffset-ByteOffset)
+      if (BytesLeft <= NextEltOffset - CurEltOffset - ByteOffset)
         return true;
 
       // Move to the next element of the struct.
-      CurPtr += NextEltOffset-CurEltOffset-ByteOffset;
-      BytesLeft -= NextEltOffset-CurEltOffset-ByteOffset;
+      CurPtr += NextEltOffset - CurEltOffset - ByteOffset;
+      BytesLeft -= NextEltOffset - CurEltOffset - ByteOffset;
       ByteOffset = 0;
       CurEltOffset = NextEltOffset;
     }
@@ -338,7 +346,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
 
   if (isa<ConstantArray>(C) || isa<ConstantVector>(C) ||
       isa<ConstantDataSequential>(C)) {
-    Type *EltTy = cast<SequentialType>(C->getType())->getElementType();
+    Type *EltTy = C->getType()->getSequentialElementType();
     uint64_t EltSize = TD.getTypeAllocSize(EltTy);
     uint64_t Index = ByteOffset / EltSize;
     uint64_t Offset = ByteOffset - Index * EltSize;
@@ -346,7 +354,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
     if (ArrayType *AT = dyn_cast<ArrayType>(C->getType()))
       NumElts = AT->getNumElements();
     else
-      NumElts = cast<VectorType>(C->getType())->getNumElements();
+      NumElts = C->getType()->getVectorNumElements();
 
     for (; Index != NumElts; ++Index) {
       if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr,
@@ -367,9 +375,10 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
 
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
     if (CE->getOpcode() == Instruction::IntToPtr &&
-        CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getContext()))
+        CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getType())) {
       return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr,
                                 BytesLeft, TD);
+    }
   }
 
   // Otherwise, unknown initializer type.
@@ -378,26 +387,29 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
 
 static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
                                                  const DataLayout &TD) {
-  Type *LoadTy = cast<PointerType>(C->getType())->getElementType();
+  PointerType *PTy = cast<PointerType>(C->getType());
+  Type *LoadTy = PTy->getElementType();
   IntegerType *IntType = dyn_cast<IntegerType>(LoadTy);
 
   // If this isn't an integer load we can't fold it directly.
   if (!IntType) {
+    unsigned AS = PTy->getAddressSpace();
+
     // If this is a float/double load, we can try folding it as an int32/64 load
     // and then bitcast the result.  This can be useful for union cases.  Note
     // that address spaces don't matter here since we're not going to result in
     // an actual new load.
     Type *MapTy;
     if (LoadTy->isHalfTy())
-      MapTy = Type::getInt16PtrTy(C->getContext());
+      MapTy = Type::getInt16PtrTy(C->getContext(), AS);
     else if (LoadTy->isFloatTy())
-      MapTy = Type::getInt32PtrTy(C->getContext());
+      MapTy = Type::getInt32PtrTy(C->getContext(), AS);
     else if (LoadTy->isDoubleTy())
-      MapTy = Type::getInt64PtrTy(C->getContext());
+      MapTy = Type::getInt64PtrTy(C->getContext(), AS);
     else if (LoadTy->isVectorTy()) {
-      MapTy = IntegerType::get(C->getContext(),
-                               TD.getTypeAllocSizeInBits(LoadTy));
-      MapTy = PointerType::getUnqual(MapTy);
+      MapTy = PointerType::getIntNPtrTy(C->getContext(),
+                                        TD.getTypeAllocSizeInBits(LoadTy),
+                                        AS);
     } else
       return 0;
 
@@ -408,10 +420,11 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
   }
 
   unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8;
-  if (BytesLoaded > 32 || BytesLoaded == 0) return 0;
+  if (BytesLoaded > 32 || BytesLoaded == 0)
+    return 0;
 
   GlobalValue *GVal;
-  APInt Offset(TD.getPointerSizeInBits(), 0);
+  APInt Offset;
   if (!IsConstantOffsetFromGlobal(C, GVal, Offset, TD))
     return 0;
 
@@ -422,7 +435,8 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
 
   // If we're loading off the beginning of the global, some bytes may be valid,
   // but we don't try to handle this.
-  if (Offset.isNegative()) return 0;
+  if (Offset.isNegative())
+    return 0;
 
   // If we're not accessing anything in this constant, the result is undefined.
   if (Offset.getZExtValue() >=
@@ -439,7 +453,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
     ResultVal = RawBytes[BytesLoaded - 1];
     for (unsigned i = 1; i != BytesLoaded; ++i) {
       ResultVal <<= 8;
-      ResultVal |= RawBytes[BytesLoaded-1-i];
+      ResultVal |= RawBytes[BytesLoaded - 1 - i];
     }
   } else {
     ResultVal = RawBytes[0];
@@ -464,14 +478,17 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
 
   // If the loaded value isn't a constant expr, we can't handle it.
   ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
-  if (!CE) return 0;
+  if (!CE)
+    return 0;
 
   if (CE->getOpcode() == Instruction::GetElementPtr) {
-    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0)))
-      if (GV->isConstant() && GV->hasDefinitiveInitializer())
+    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0))) {
+      if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
         if (Constant *V =
              ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE))
           return V;
+      }
+    }
   }
 
   // Instead of loading constant c string, use corresponding integer value
@@ -576,13 +593,13 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
   // constant.  This happens frequently when iterating over a global array.
   if (Opc == Instruction::Sub && DL) {
     GlobalValue *GV1, *GV2;
-    unsigned PtrSize = DL->getPointerSizeInBits();
-    unsigned OpSize = DL->getTypeSizeInBits(Op0->getType());
-    APInt Offs1(PtrSize, 0), Offs2(PtrSize, 0);
+    APInt Offs1, Offs2;
 
     if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *DL))
       if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *DL) &&
           GV1 == GV2) {
+        unsigned OpSize = DL->getTypeSizeInBits(Op0->getType());
+
         // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow.
         // PtrToInt may change the bitwidth so we have convert to the right size
         // first.
@@ -600,15 +617,18 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
 static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
                                 Type *ResultTy, const DataLayout *TD,
                                 const TargetLibraryInfo *TLI) {
-  if (!TD) return 0;
-  Type *IntPtrTy = TD->getIntPtrType(ResultTy->getContext());
+  if (!TD)
+    return 0;
+
+  Type *IntPtrTy = TD->getIntPtrType(ResultTy);
 
   bool Any = false;
   SmallVector<Constant*, 32> NewIdxs;
   for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
     if ((i == 1 ||
-         !isa<StructType>(GetElementPtrInst::getIndexedType(Ops[0]->getType(),
-                                                        Ops.slice(1, i-1)))) &&
+         !isa<StructType>(GetElementPtrInst::getIndexedType(
+                            Ops[0]->getType(),
+                            Ops.slice(1, i - 1)))) &&
         Ops[i]->getType() != IntPtrTy) {
       Any = true;
       NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i],
@@ -619,13 +639,16 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
     } else
       NewIdxs.push_back(Ops[i]);
   }
-  if (!Any) return 0;
 
-  Constant *C =
-    ConstantExpr::getGetElementPtr(Ops[0], NewIdxs);
-  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+  if (!Any)
+    return 0;
+
+  Constant *C = ConstantExpr::getGetElementPtr(Ops[0], NewIdxs);
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
     if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
       C = Folded;
+  }
+
   return C;
 }
 
@@ -640,7 +663,7 @@ static Constant* StripPtrCastKeepAS(Constant* Ptr) {
   if (NewPtrTy->getAddressSpace() != OldPtrTy->getAddressSpace()) {
     NewPtrTy = NewPtrTy->getElementType()->getPointerTo(
       OldPtrTy->getAddressSpace());
-    Ptr = ConstantExpr::getBitCast(Ptr, NewPtrTy);
+    Ptr = ConstantExpr::getPointerCast(Ptr, NewPtrTy);
   }
   return Ptr;
 }
@@ -651,11 +674,12 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
                                          Type *ResultTy, const DataLayout *TD,
                                          const TargetLibraryInfo *TLI) {
   Constant *Ptr = Ops[0];
-  if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized() ||
+  if (!TD || !Ptr->getType()->getPointerElementType()->isSized() ||
       !Ptr->getType()->isPointerTy())
     return 0;
 
-  Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext());
+  Type *IntPtrTy = TD->getIntPtrType(Ptr->getType());
+  Type *ResultElementTy = ResultTy->getPointerElementType();
 
   // If this is a constant expr gep that is effectively computing an
   // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12'
@@ -664,8 +688,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
 
       // If this is "gep i8* Ptr, (sub 0, V)", fold this as:
       // "inttoptr (sub (ptrtoint Ptr), V)"
-      if (Ops.size() == 2 &&
-          cast<PointerType>(ResultTy)->getElementType()->isIntegerTy(8)) {
+      if (Ops.size() == 2 && ResultElementTy->isIntegerTy(8)) {
         ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[1]);
         assert((CE == 0 || CE->getType() == IntPtrTy) &&
                "CastGEPIndices didn't canonicalize index types!");
@@ -692,7 +715,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
 
   // If this is a GEP of a GEP, fold it all into a single GEP.
   while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
-    SmallVector<Value *, 4> NestedOps(GEP->op_begin()+1, GEP->op_end());
+    SmallVector<Value *, 4> NestedOps(GEP->op_begin() + 1, GEP->op_end());
 
     // Do not try the incorporate the sub-GEP if some index is not a number.
     bool AllConstantInt = true;
@@ -713,12 +736,15 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
   // If the base value for this address is a literal integer value, fold the
   // getelementptr to the resulting integer value casted to the pointer type.
   APInt BasePtr(BitWidth, 0);
-  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
-    if (CE->getOpcode() == Instruction::IntToPtr)
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
+    if (CE->getOpcode() == Instruction::IntToPtr) {
       if (ConstantInt *Base = dyn_cast<ConstantInt>(CE->getOperand(0)))
         BasePtr = Base->getValue().zextOrTrunc(BitWidth);
+    }
+  }
+
   if (Ptr->isNullValue() || BasePtr != 0) {
-    Constant *C = ConstantInt::get(Ptr->getContext(), Offset+BasePtr);
+    Constant *C = ConstantInt::get(Ptr->getContext(), Offset + BasePtr);
     return ConstantExpr::getIntToPtr(C, ResultTy);
   }
 
@@ -728,7 +754,8 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
   // Also, this helps GlobalOpt do SROA on GlobalVariables.
   Type *Ty = Ptr->getType();
   assert(Ty->isPointerTy() && "Forming regular GEP of non-pointer type");
-  SmallVector<Constant*, 32> NewIdxs;
+  SmallVector<Constant *, 32> NewIdxs;
+
   do {
     if (SequentialType *ATy = dyn_cast<SequentialType>(Ty)) {
       if (ATy->isPointerTy()) {
@@ -743,7 +770,6 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
 
       // Determine which element of the array the offset points into.
       APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType()));
-      IntegerType *IntPtrTy = TD->getIntPtrType(Ty->getContext());
       if (ElemSize == 0)
         // The element size is 0. This may be [0 x Ty]*, so just use a zero
         // index for this level and proceed to the next level to see if it can
@@ -778,7 +804,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
       // We've reached some non-indexable type.
       break;
     }
-  } while (Ty != cast<PointerType>(ResultTy)->getElementType());
+  } while (Ty != ResultElementTy);
 
   // If we haven't used up the entire offset by descending the static
   // type, then the offset is pointing into the middle of an indivisible
@@ -787,14 +813,13 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
     return 0;
 
   // Create a GEP.
-  Constant *C =
-    ConstantExpr::getGetElementPtr(Ptr, NewIdxs);
-  assert(cast<PointerType>(C->getType())->getElementType() == Ty &&
+  Constant *C = ConstantExpr::getGetElementPtr(Ptr, NewIdxs);
+  assert(C->getType()->getPointerElementType() == Ty &&
          "Computed GetElementPtr has unexpected type!");
 
   // If we ended up indexing a member with a type that doesn't match
   // the type of what the original indices indexed, add a cast.
-  if (Ty != cast<PointerType>(ResultTy)->getElementType())
+  if (Ty != ResultElementTy)
     C = FoldBitCast(C, ResultTy, *TD);
 
   return C;
@@ -867,16 +892,18 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I,
   if (const LoadInst *LI = dyn_cast<LoadInst>(I))
     return ConstantFoldLoadInst(LI, TD);
 
-  if (InsertValueInst *IVI = dyn_cast<InsertValueInst>(I))
+  if (InsertValueInst *IVI = dyn_cast<InsertValueInst>(I)) {
     return ConstantExpr::getInsertValue(
                                 cast<Constant>(IVI->getAggregateOperand()),
                                 cast<Constant>(IVI->getInsertedValueOperand()),
                                 IVI->getIndices());
+  }
 
-  if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I))
+  if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I)) {
     return ConstantExpr::getExtractValue(
                                     cast<Constant>(EVI->getAggregateOperand()),
                                     EVI->getIndices());
+  }
 
   return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD, TLI);
 }
@@ -930,9 +957,10 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
                                          const TargetLibraryInfo *TLI) {
   // Handle easy binops first.
   if (Instruction::isBinaryOp(Opcode)) {
-    if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1]))
+    if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1])) {
       if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD))
         return C;
+    }
 
     return ConstantExpr::get(Opcode, Ops[0], Ops[1]);
   }
@@ -953,10 +981,11 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
       if (TD && CE->getOpcode() == Instruction::IntToPtr) {
         Constant *Input = CE->getOperand(0);
         unsigned InWidth = Input->getType()->getScalarSizeInBits();
-        if (TD->getPointerSizeInBits() < InWidth) {
+        unsigned PtrWidth = TD->getPointerTypeSizeInBits(CE->getType());
+        if (PtrWidth < InWidth) {
           Constant *Mask =
-            ConstantInt::get(CE->getContext(), APInt::getLowBitsSet(InWidth,
-                                                  TD->getPointerSizeInBits()));
+            ConstantInt::get(CE->getContext(),
+                             APInt::getLowBitsSet(InWidth, PtrWidth));
           Input = ConstantExpr::getAnd(Input, Mask);
         }
         // Do a zext or trunc to get to the dest size.
@@ -966,13 +995,22 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
     return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
   case Instruction::IntToPtr:
     // If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if
-    // the int size is >= the ptr size.  This requires knowing the width of a
-    // pointer, so it can't be done in ConstantExpr::getCast.
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0]))
-      if (TD &&
-          TD->getPointerSizeInBits() <= CE->getType()->getScalarSizeInBits() &&
-          CE->getOpcode() == Instruction::PtrToInt)
-        return FoldBitCast(CE->getOperand(0), DestTy, *TD);
+    // the int size is >= the ptr size and the address spaces are the same.
+    // This requires knowing the width of a pointer, so it can't be done in
+    // ConstantExpr::getCast.
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) {
+      if (TD && CE->getOpcode() == Instruction::PtrToInt) {
+        Constant *SrcPtr = CE->getOperand(0);
+        unsigned SrcPtrSize = TD->getPointerTypeSizeInBits(SrcPtr->getType());
+        unsigned MidIntSize = CE->getType()->getScalarSizeInBits();
+
+        if (MidIntSize >= SrcPtrSize) {
+          unsigned SrcAS = SrcPtr->getType()->getPointerAddressSpace();
+          if (SrcAS == DestTy->getPointerAddressSpace())
+            return FoldBitCast(CE->getOperand(0), DestTy, *TD);
+        }
+      }
+    }
 
     return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
   case Instruction::Trunc:
@@ -984,6 +1022,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
   case Instruction::SIToFP:
   case Instruction::FPToUI:
   case Instruction::FPToSI:
+  case Instruction::AddrSpaceCast:
       return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
   case Instruction::BitCast:
     if (TD)
@@ -1024,8 +1063,8 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
   // around to know if bit truncation is happening.
   if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops0)) {
     if (TD && Ops1->isNullValue()) {
-      Type *IntPtrTy = TD->getIntPtrType(CE0->getContext());
       if (CE0->getOpcode() == Instruction::IntToPtr) {
+        Type *IntPtrTy = TD->getIntPtrType(CE0->getType());
         // Convert the integer value to the right size to ensure we get the
         // proper extension or truncation.
         Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0),
@@ -1036,19 +1075,21 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
 
       // Only do this transformation if the int is intptrty in size, otherwise
       // there is a truncation or extension that we aren't modeling.
-      if (CE0->getOpcode() == Instruction::PtrToInt &&
-          CE0->getType() == IntPtrTy) {
-        Constant *C = CE0->getOperand(0);
-        Constant *Null = Constant::getNullValue(C->getType());
-        return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI);
+      if (CE0->getOpcode() == Instruction::PtrToInt) {
+        Type *IntPtrTy = TD->getIntPtrType(CE0->getOperand(0)->getType());
+        if (CE0->getType() == IntPtrTy) {
+          Constant *C = CE0->getOperand(0);
+          Constant *Null = Constant::getNullValue(C->getType());
+          return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI);
+        }
       }
     }
 
     if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops1)) {
       if (TD && CE0->getOpcode() == CE1->getOpcode()) {
-        Type *IntPtrTy = TD->getIntPtrType(CE0->getContext());
-
         if (CE0->getOpcode() == Instruction::IntToPtr) {
+          Type *IntPtrTy = TD->getIntPtrType(CE0->getType());
+
           // Convert the integer value to the right size to ensure we get the
           // proper extension or truncation.
           Constant *C0 = ConstantExpr::getIntegerCast(CE0->getOperand(0),
@@ -1060,11 +1101,17 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
 
         // Only do this transformation if the int is intptrty in size, otherwise
         // there is a truncation or extension that we aren't modeling.
-        if ((CE0->getOpcode() == Instruction::PtrToInt &&
-             CE0->getType() == IntPtrTy &&
-             CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()))
-          return ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0),
-                                                 CE1->getOperand(0), TD, TLI);
+        if (CE0->getOpcode() == Instruction::PtrToInt) {
+          Type *IntPtrTy = TD->getIntPtrType(CE0->getOperand(0)->getType());
+          if (CE0->getType() == IntPtrTy &&
+              CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()) {
+            return ConstantFoldCompareInstOperands(Predicate,
+                                                   CE0->getOperand(0),
+                                                   CE1->getOperand(0),
+                                                   TD,
+                                                   TLI);
+          }
+        }
       }
     }
 
@@ -1101,7 +1148,8 @@ Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C,
   // addressing.
   for (unsigned i = 2, e = CE->getNumOperands(); i != e; ++i) {
     C = C->getAggregateElement(CE->getOperand(i));
-    if (C == 0) return 0;
+    if (C == 0)
+      return 0;
   }
   return C;
 }
@@ -1116,7 +1164,8 @@ Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C,
   // addressing.
   for (unsigned i = 0, e = Indices.size(); i != e; ++i) {
     C = C->getAggregateElement(Indices[i]);
-    if (C == 0) return 0;
+    if (C == 0)
+      return 0;
   }
   return C;
 }
@@ -1128,8 +1177,7 @@ Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C,
 
 /// canConstantFoldCallTo - Return true if its even possible to fold a call to
 /// the specified function.
-bool
-llvm::canConstantFoldCallTo(const Function *F) {
+bool llvm::canConstantFoldCallTo(const Function *F) {
   switch (F->getIntrinsicID()) {
   case Intrinsic::fabs:
   case Intrinsic::log:
@@ -1167,7 +1215,8 @@ llvm::canConstantFoldCallTo(const Function *F) {
   case 0: break;
   }
 
-  if (!F->hasName()) return false;
+  if (!F->hasName())
+    return false;
   StringRef Name = F->getName();
 
   // In these cases, the check of the length is required.  We don't want to
@@ -1250,7 +1299,7 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
 static Constant *ConstantFoldConvertToInt(const APFloat &Val,
                                           bool roundTowardZero, Type *Ty) {
   // All of these conversion intrinsics form an integer of at most 64bits.
-  unsigned ResultWidth = cast<IntegerType>(Ty)->getBitWidth();
+  unsigned ResultWidth = Ty->getIntegerBitWidth();
   assert(ResultWidth <= 64 &&
          "Can only constant fold conversions to 64 and 32 bit ints");
 
@@ -1271,7 +1320,8 @@ static Constant *ConstantFoldConvertToInt(const APFloat &Val,
 Constant *
 llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
                        const TargetLibraryInfo *TLI) {
-  if (!F->hasName()) return 0;
+  if (!F->hasName())
+    return 0;
   StringRef Name = F->getName();
 
   Type *Ty = F->getReturnType();
diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp
index 927508e..f943258 100644
--- a/lib/Analysis/CostModel.cpp
+++ b/lib/Analysis/CostModel.cpp
@@ -19,6 +19,7 @@
 
 #define CM_NAME "cost-model"
 #define DEBUG_TYPE CM_NAME
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/Function.h"
@@ -26,10 +27,15 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
+static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
+                                     cl::Hidden,
+                                     cl::desc("Recognize reduction patterns."));
+
 namespace {
   class CostModelAnalysis : public FunctionPass {
 
@@ -105,6 +111,260 @@ static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) {
   return OpInfo;
 }
 
+static bool matchMask(SmallVectorImpl<int> &M1, SmallVectorImpl<int> &M2) {
+  if (M1.size() != M2.size())
+    return false;
+
+  for (unsigned i = 0, e = M1.size(); i != e; ++i)
+    if (M1[i] != M2[i])
+      return false;
+
+  return true;
+}
+
+static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft,
+                                     unsigned Level) {
+  // We don't need a shuffle if we just want to have element 0 in position 0 of
+  // the vector.
+  if (!SI && Level == 0 && IsLeft)
+    return true;
+  else if (!SI)
+    return false;
+
+  SmallVector<int, 32> Mask(SI->getType()->getVectorNumElements(), -1);
+
+  // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether
+  // we look at the left or right side.
+  for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2)
+    Mask[i] = val;
+
+  SmallVector<int, 16> ActualMask = SI->getShuffleMask();
+  if (!matchMask(Mask, ActualMask))
+    return false;
+
+  return true;
+}
+
+static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp,
+                                          unsigned Level, unsigned NumLevels) {
+  // Match one level of pairwise operations.
+  // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
+  //       <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
+  // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
+  //       <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+  // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
+  if (BinOp == 0)
+    return false;
+
+  assert(BinOp->getType()->isVectorTy() && "Expecting a vector type");
+
+  unsigned Opcode = BinOp->getOpcode();
+  Value *L = BinOp->getOperand(0);
+  Value *R = BinOp->getOperand(1);
+
+  ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(L);
+  if (!LS && Level)
+    return false;
+  ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(R);
+  if (!RS && Level)
+    return false;
+
+  // On level 0 we can omit one shufflevector instruction.
+  if (!Level && !RS && !LS)
+    return false;
+
+  // Shuffle inputs must match.
+  Value *NextLevelOpL = LS ? LS->getOperand(0) : 0;
+  Value *NextLevelOpR = RS ? RS->getOperand(0) : 0;
+  Value *NextLevelOp = 0;
+  if (NextLevelOpR && NextLevelOpL) {
+    // If we have two shuffles their operands must match.
+    if (NextLevelOpL != NextLevelOpR)
+      return false;
+
+    NextLevelOp = NextLevelOpL;
+  } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) {
+    // On the first level we can omit the shufflevector <0, undef,...>. So the
+    // input to the other shufflevector <1, undef> must match with one of the
+    // inputs to the current binary operation.
+    // Example:
+    //  %NextLevelOpL = shufflevector %R, <1, undef ...>
+    //  %BinOp        = fadd          %NextLevelOpL, %R
+    if (NextLevelOpL && NextLevelOpL != R)
+      return false;
+    else if (NextLevelOpR && NextLevelOpR != L)
+      return false;
+
+    NextLevelOp = NextLevelOpL ? R : L;
+  } else
+    return false;
+
+  // Check that the next levels binary operation exists and matches with the
+  // current one.
+  BinaryOperator *NextLevelBinOp = 0;
+  if (Level + 1 != NumLevels) {
+    if (!(NextLevelBinOp = dyn_cast<BinaryOperator>(NextLevelOp)))
+      return false;
+    else if (NextLevelBinOp->getOpcode() != Opcode)
+      return false;
+  }
+
+  // Shuffle mask for pairwise operation must match.
+  if (matchPairwiseShuffleMask(LS, true, Level)) {
+    if (!matchPairwiseShuffleMask(RS, false, Level))
+      return false;
+  } else if (matchPairwiseShuffleMask(RS, true, Level)) {
+    if (!matchPairwiseShuffleMask(LS, false, Level))
+      return false;
+  } else
+    return false;
+
+  if (++Level == NumLevels)
+    return true;
+
+  // Match next level.
+  return matchPairwiseReductionAtLevel(NextLevelBinOp, Level, NumLevels);
+}
+
+static bool matchPairwiseReduction(const ExtractElementInst *ReduxRoot,
+                                   unsigned &Opcode, Type *&Ty) {
+  if (!EnableReduxCost)
+    return false;
+
+  // Need to extract the first element.
+  ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
+  unsigned Idx = ~0u;
+  if (CI)
+    Idx = CI->getZExtValue();
+  if (Idx != 0)
+    return false;
+
+  BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0));
+  if (!RdxStart)
+    return false;
+
+  Type *VecTy = ReduxRoot->getOperand(0)->getType();
+  unsigned NumVecElems = VecTy->getVectorNumElements();
+  if (!isPowerOf2_32(NumVecElems))
+    return false;
+
+  // We look for a sequence of shuffle,shuffle,add triples like the following
+  // that builds a pairwise reduction tree.
+  //
+  //  (X0, X1, X2, X3)
+  //   (X0 + X1, X2 + X3, undef, undef)
+  //    ((X0 + X1) + (X2 + X3), undef, undef, undef)
+  //
+  // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
+  //       <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
+  // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
+  //       <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+  // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
+  // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
+  //       <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+  // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
+  //       <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
+  // %r = extractelement <4 x float> %bin.rdx8, i32 0
+  if (!matchPairwiseReductionAtLevel(RdxStart, 0,  Log2_32(NumVecElems)))
+    return false;
+
+  Opcode = RdxStart->getOpcode();
+  Ty = VecTy;
+
+  return true;
+}
+
+static std::pair<Value *, ShuffleVectorInst *>
+getShuffleAndOtherOprd(BinaryOperator *B) {
+
+  Value *L = B->getOperand(0);
+  Value *R = B->getOperand(1);
+  ShuffleVectorInst *S = 0;
+
+  if ((S = dyn_cast<ShuffleVectorInst>(L)))
+    return std::make_pair(R, S);
+
+  S = dyn_cast<ShuffleVectorInst>(R);
+  return std::make_pair(L, S);
+}
+
+static bool matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot,
+                                          unsigned &Opcode, Type *&Ty) {
+  if (!EnableReduxCost)
+    return false;
+
+  // Need to extract the first element.
+  ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
+  unsigned Idx = ~0u;
+  if (CI)
+    Idx = CI->getZExtValue();
+  if (Idx != 0)
+    return false;
+
+  BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0));
+  if (!RdxStart)
+    return false;
+  unsigned RdxOpcode = RdxStart->getOpcode();
+
+  Type *VecTy = ReduxRoot->getOperand(0)->getType();
+  unsigned NumVecElems = VecTy->getVectorNumElements();
+  if (!isPowerOf2_32(NumVecElems))
+    return false;
+
+  // We look for a sequence of shuffles and adds like the following matching one
+  // fadd, shuffle vector pair at a time.
+  //
+  // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef,
+  //                           <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
+  // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef,
+  //                          <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
+  // %r = extractelement <4 x float> %bin.rdx8, i32 0
+
+  unsigned MaskStart = 1;
+  Value *RdxOp = RdxStart;
+  SmallVector<int, 32> ShuffleMask(NumVecElems, 0);
+  unsigned NumVecElemsRemain = NumVecElems;
+  while (NumVecElemsRemain - 1) {
+    // Check for the right reduction operation.
+    BinaryOperator *BinOp;
+    if (!(BinOp = dyn_cast<BinaryOperator>(RdxOp)))
+      return false;
+    if (BinOp->getOpcode() != RdxOpcode)
+      return false;
+
+    Value *NextRdxOp;
+    ShuffleVectorInst *Shuffle;
+    tie(NextRdxOp, Shuffle) = getShuffleAndOtherOprd(BinOp);
+
+    // Check the current reduction operation and the shuffle use the same value.
+    if (Shuffle == 0)
+      return false;
+    if (Shuffle->getOperand(0) != NextRdxOp)
+      return false;
+
+    // Check that shuffle masks matches.
+    for (unsigned j = 0; j != MaskStart; ++j)
+      ShuffleMask[j] = MaskStart + j;
+    // Fill the rest of the mask with -1 for undef.
+    std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1);
+
+    SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
+    if (!matchMask(ShuffleMask, Mask))
+      return false;
+
+    RdxOp = NextRdxOp;
+    NumVecElemsRemain /= 2;
+    MaskStart *= 2;
+  }
+
+  Opcode = RdxOpcode;
+  Ty = VecTy;
+  return true;
+}
+
 unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
   if (!TTI)
     return -1;
@@ -189,6 +449,17 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
     unsigned Idx = -1;
     if (CI)
       Idx = CI->getZExtValue();
+
+    // Try to match a reduction sequence (series of shufflevector and vector
+    // adds followed by a extractelement).
+    unsigned ReduxOpCode;
+    Type *ReduxType;
+
+    if (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType))
+      return TTI->getReductionCost(ReduxOpCode, ReduxType, false);
+    else if (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType))
+      return TTI->getReductionCost(ReduxOpCode, ReduxType, true);
+
     return TTI->getVectorInstrCost(I->getOpcode(),
                                    EEI->getOperand(0)->getType(), Idx);
   }
diff --git a/lib/Analysis/Delinearization.cpp b/lib/Analysis/Delinearization.cpp
new file mode 100644
index 0000000..3ed0609
--- /dev/null
+++ b/lib/Analysis/Delinearization.cpp
@@ -0,0 +1,133 @@
+//===---- Delinearization.cpp - MultiDimensional Index Delinearization ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements an analysis pass that tries to delinearize all GEP
+// instructions in all loops using the SCEV analysis functionality. This pass is
+// only used for testing purposes: if your pass needs delinearization, please
+// use the on-demand SCEVAddRecExpr::delinearize() function.
+//
+//===----------------------------------------------------------------------===//
+
+#define DL_NAME "delinearize"
+#define DEBUG_TYPE DL_NAME
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Pass.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+class Delinearization : public FunctionPass {
+  Delinearization(const Delinearization &); // do not implement
+protected:
+  Function *F;
+  LoopInfo *LI;
+  ScalarEvolution *SE;
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+
+  Delinearization() : FunctionPass(ID) {
+    initializeDelinearizationPass(*PassRegistry::getPassRegistry());
+  }
+  virtual bool runOnFunction(Function &F);
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+  virtual void print(raw_ostream &O, const Module *M = 0) const;
+};
+
+} // end anonymous namespace
+
+void Delinearization::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<LoopInfo>();
+  AU.addRequired<ScalarEvolution>();
+}
+
+bool Delinearization::runOnFunction(Function &F) {
+  this->F = &F;
+  SE = &getAnalysis<ScalarEvolution>();
+  LI = &getAnalysis<LoopInfo>();
+  return false;
+}
+
+static Value *getPointerOperand(Instruction &Inst) {
+  if (LoadInst *Load = dyn_cast<LoadInst>(&Inst))
+    return Load->getPointerOperand();
+  else if (StoreInst *Store = dyn_cast<StoreInst>(&Inst))
+    return Store->getPointerOperand();
+  else if (GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(&Inst))
+    return Gep->getPointerOperand();
+  return NULL;
+}
+
+void Delinearization::print(raw_ostream &O, const Module *) const {
+  O << "Delinearization on function " << F->getName() << ":\n";
+  for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+    Instruction *Inst = &(*I);
+
+    // Only analyze loads and stores.
+    if (!isa<StoreInst>(Inst) && !isa<LoadInst>(Inst) &&
+        !isa<GetElementPtrInst>(Inst))
+      continue;
+
+    const BasicBlock *BB = Inst->getParent();
+    // Delinearize the memory access as analyzed in all the surrounding loops.
+    // Do not analyze memory accesses outside loops.
+    for (Loop *L = LI->getLoopFor(BB); L != NULL; L = L->getParentLoop()) {
+      const SCEV *AccessFn = SE->getSCEVAtScope(getPointerOperand(*Inst), L);
+      const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(AccessFn);
+
+      // Do not try to delinearize memory accesses that are not AddRecs.
+      if (!AR)
+        break;
+
+      O << "AddRec: " << *AR << "\n";
+
+      SmallVector<const SCEV *, 3> Subscripts, Sizes;
+      const SCEV *Res = AR->delinearize(*SE, Subscripts, Sizes);
+      int Size = Subscripts.size();
+      if (Res == AR || Size == 0) {
+        O << "failed to delinearize\n";
+        continue;
+      }
+      O << "Base offset: " << *Res << "\n";
+      O << "ArrayDecl[UnknownSize]";
+      for (int i = 0; i < Size - 1; i++)
+        O << "[" << *Sizes[i] << "]";
+      O << " with elements of " << *Sizes[Size - 1] << " bytes.\n";
+
+      O << "ArrayRef";
+      for (int i = 0; i < Size; i++)
+        O << "[" << *Subscripts[i] << "]";
+      O << "\n";
+    }
+  }
+}
+
+char Delinearization::ID = 0;
+static const char delinearization_name[] = "Delinearization";
+INITIALIZE_PASS_BEGIN(Delinearization, DL_NAME, delinearization_name, true,
+                      true)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(Delinearization, DL_NAME, delinearization_name, true, true)
+
+FunctionPass *llvm::createDelinearizationPass() { return new Delinearization; }
diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp
index a0f1a69..3b3e2ef 100644
--- a/lib/Analysis/DependenceAnalysis.cpp
+++ b/lib/Analysis/DependenceAnalysis.cpp
@@ -24,11 +24,11 @@
 // Both of these are conservative weaknesses;
 // that is, not a source of correctness problems.
 //
-// The implementation depends on the GEP instruction to
-// differentiate subscripts. Since Clang linearizes subscripts
-// for most arrays, we give up some precision (though the existing MIV tests
-// will help). We trust that the GEP instruction will eventually be extended.
-// In the meantime, we should explore Maslov's ideas about delinearization.
+// The implementation depends on the GEP instruction to differentiate
+// subscripts. Since Clang linearizes some array subscripts, the dependence
+// analysis is using SCEV->delinearize to recover the representation of multiple
+// subscripts, and thus avoid the more expensive and less precise MIV tests. The
+// delinearization is controlled by the flag -da-delinearize.
 //
 // We should pay some careful attention to the possibility of integer overflow
 // in the implementation of the various tests. This could happen with Add,
@@ -61,6 +61,7 @@
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/InstIterator.h"
@@ -104,6 +105,10 @@ STATISTIC(BanerjeeApplications, "Banerjee applications");
 STATISTIC(BanerjeeIndependence, "Banerjee independence");
 STATISTIC(BanerjeeSuccesses, "Banerjee successes");
 
+static cl::opt<bool>
+Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore,
+            cl::desc("Try to delinearize array references."));
+
 //===----------------------------------------------------------------------===//
 // basics
 
@@ -3171,6 +3176,55 @@ void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level,
     llvm_unreachable("constraint has unexpected kind");
 }
 
+/// Check if we can delinearize the subscripts. If the SCEVs representing the
+/// source and destination array references are recurrences on a nested loop,
+/// this function flattens the nested recurrences into seperate recurrences
+/// for each loop level.
+bool
+DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, const SCEV *DstSCEV,
+                                   SmallVectorImpl<Subscript> &Pair) const {
+  const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV);
+  const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV);
+  if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine())
+    return false;
+
+  SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts, SrcSizes, DstSizes;
+  SrcAR->delinearize(*SE, SrcSubscripts, SrcSizes);
+  DstAR->delinearize(*SE, DstSubscripts, DstSizes);
+
+  int size = SrcSubscripts.size();
+  int dstSize = DstSubscripts.size();
+  if (size != dstSize || size < 2)
+    return false;
+
+#ifndef NDEBUG
+  DEBUG(errs() << "\nSrcSubscripts: ");
+  for (int i = 0; i < size; i++)
+    DEBUG(errs() << *SrcSubscripts[i]);
+  DEBUG(errs() << "\nDstSubscripts: ");
+  for (int i = 0; i < size; i++)
+    DEBUG(errs() << *DstSubscripts[i]);
+#endif
+
+  // The delinearization transforms a single-subscript MIV dependence test into
+  // a multi-subscript SIV dependence test that is easier to compute. So we
+  // resize Pair to contain as many pairs of subscripts as the delinearization
+  // has found, and then initialize the pairs following the delinearization.
+  Pair.resize(size);
+  for (int i = 0; i < size; ++i) {
+    Pair[i].Src = SrcSubscripts[i];
+    Pair[i].Dst = DstSubscripts[i];
+
+    // FIXME: we should record the bounds SrcSizes[i] and DstSizes[i] that the
+    // delinearization has found, and add these constraints to the dependence
+    // check to avoid memory accesses overflow from one dimension into another.
+    // This is related to the problem of determining the existence of data
+    // dependences in array accesses using a different number of subscripts: in
+    // C one can access an array A[100][100]; as A[0][9999], *A[9999], etc.
+  }
+
+  return true;
+}
 
 //===----------------------------------------------------------------------===//
 
@@ -3280,6 +3334,12 @@ Dependence *DependenceAnalysis::depends(Instruction *Src,
     Pair[0].Dst = DstSCEV;
   }
 
+  if (Delinearize && Pairs == 1 && CommonLevels > 1 &&
+      tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair)) {
+    DEBUG(dbgs() << "    delinerized GEP\n");
+    Pairs = Pair.size();
+  }
+
   for (unsigned P = 0; P < Pairs; ++P) {
     Pair[P].Loops.resize(MaxLevels + 1);
     Pair[P].GroupLoops.resize(MaxLevels + 1);
@@ -3698,6 +3758,12 @@ const  SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep,
     Pair[0].Dst = DstSCEV;
   }
 
+  if (Delinearize && Pairs == 1 && CommonLevels > 1 &&
+      tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair)) {
+    DEBUG(dbgs() << "    delinerized GEP\n");
+    Pairs = Pair.size();
+  }
+
   for (unsigned P = 0; P < Pairs; ++P) {
     Pair[P].Loops.resize(MaxLevels + 1);
     Pair[P].GroupLoops.resize(MaxLevels + 1);
diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp
index 7620fd9..f042964 100644
--- a/lib/Analysis/IPA/CallGraph.cpp
+++ b/lib/Analysis/IPA/CallGraph.cpp
@@ -6,11 +6,6 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements the CallGraph class and provides the BasicCallGraph
-// default implementation.
-//
-//===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/IR/Instructions.h"
@@ -21,168 +16,92 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-namespace {
+CallGraph::CallGraph()
+    : ModulePass(ID), Root(0), ExternalCallingNode(0), CallsExternalNode(0) {
+  initializeCallGraphPass(*PassRegistry::getPassRegistry());
+}
 
-//===----------------------------------------------------------------------===//
-// BasicCallGraph class definition
-//
-class BasicCallGraph : public ModulePass, public CallGraph {
-  // Root is root of the call graph, or the external node if a 'main' function
-  // couldn't be found.
-  //
-  CallGraphNode *Root;
-
-  // ExternalCallingNode - This node has edges to all external functions and
-  // those internal functions that have their address taken.
-  CallGraphNode *ExternalCallingNode;
-
-  // CallsExternalNode - This node has edges to it from all functions making
-  // indirect calls or calling an external function.
-  CallGraphNode *CallsExternalNode;
-
-public:
-  static char ID; // Class identification, replacement for typeinfo
-  BasicCallGraph() : ModulePass(ID), Root(0), 
-    ExternalCallingNode(0), CallsExternalNode(0) {
-      initializeBasicCallGraphPass(*PassRegistry::getPassRegistry());
-    }
+void CallGraph::addToCallGraph(Function *F) {
+  CallGraphNode *Node = getOrInsertFunction(F);
 
-  // runOnModule - Compute the call graph for the specified module.
-  virtual bool runOnModule(Module &M) {
-    CallGraph::initialize(M);
-    
-    ExternalCallingNode = getOrInsertFunction(0);
-    CallsExternalNode = new CallGraphNode(0);
-    Root = 0;
-  
-    // Add every function to the call graph.
-    for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
-      addToCallGraph(I);
-  
-    // If we didn't find a main function, use the external call graph node
-    if (Root == 0) Root = ExternalCallingNode;
-    
-    return false;
-  }
-
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-    AU.setPreservesAll();
-  }
+  // If this function has external linkage, anything could call it.
+  if (!F->hasLocalLinkage()) {
+    ExternalCallingNode->addCalledFunction(CallSite(), Node);
 
-  virtual void print(raw_ostream &OS, const Module *) const {
-    OS << "CallGraph Root is: ";
-    if (Function *F = getRoot()->getFunction())
-      OS << F->getName() << "\n";
-    else {
-      OS << "<<null function: 0x" << getRoot() << ">>\n";
+    // Found the entry point?
+    if (F->getName() == "main") {
+      if (Root) // Found multiple external mains?  Don't pick one.
+        Root = ExternalCallingNode;
+      else
+        Root = Node; // Found a main, keep track of it!
     }
-    
-    CallGraph::print(OS, 0);
   }
 
-  virtual void releaseMemory() {
-    destroy();
-  }
-  
-  /// getAdjustedAnalysisPointer - This method is used when a pass implements
-  /// an analysis interface through multiple inheritance.  If needed, it should
-  /// override this to adjust the this pointer as needed for the specified pass
-  /// info.
-  virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
-    if (PI == &CallGraph::ID)
-      return (CallGraph*)this;
-    return this;
-  }
-  
-  CallGraphNode* getExternalCallingNode() const { return ExternalCallingNode; }
-  CallGraphNode* getCallsExternalNode()   const { return CallsExternalNode; }
-
-  // getRoot - Return the root of the call graph, which is either main, or if
-  // main cannot be found, the external node.
-  //
-  CallGraphNode *getRoot()             { return Root; }
-  const CallGraphNode *getRoot() const { return Root; }
-
-private:
-  //===---------------------------------------------------------------------
-  // Implementation of CallGraph construction
-  //
-
-  // addToCallGraph - Add a function to the call graph, and link the node to all
-  // of the functions that it calls.
-  //
-  void addToCallGraph(Function *F) {
-    CallGraphNode *Node = getOrInsertFunction(F);
-
-    // If this function has external linkage, anything could call it.
-    if (!F->hasLocalLinkage()) {
-      ExternalCallingNode->addCalledFunction(CallSite(), Node);
-
-      // Found the entry point?
-      if (F->getName() == "main") {
-        if (Root)    // Found multiple external mains?  Don't pick one.
-          Root = ExternalCallingNode;
-        else
-          Root = Node;          // Found a main, keep track of it!
+  // If this function has its address taken, anything could call it.
+  if (F->hasAddressTaken())
+    ExternalCallingNode->addCalledFunction(CallSite(), Node);
+
+  // If this function is not defined in this translation unit, it could call
+  // anything.
+  if (F->isDeclaration() && !F->isIntrinsic())
+    Node->addCalledFunction(CallSite(), CallsExternalNode);
+
+  // Look for calls by this function.
+  for (Function::iterator BB = F->begin(), BBE = F->end(); BB != BBE; ++BB)
+    for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;
+         ++II) {
+      CallSite CS(cast<Value>(II));
+      if (CS) {
+        const Function *Callee = CS.getCalledFunction();
+        if (!Callee)
+          // Indirect calls of intrinsics are not allowed so no need to check.
+          Node->addCalledFunction(CS, CallsExternalNode);
+        else if (!Callee->isIntrinsic())
+          Node->addCalledFunction(CS, getOrInsertFunction(Callee));
       }
     }
+}
 
-    // If this function has its address taken, anything could call it.
-    if (F->hasAddressTaken())
-      ExternalCallingNode->addCalledFunction(CallSite(), Node);
-
-    // If this function is not defined in this translation unit, it could call
-    // anything.
-    if (F->isDeclaration() && !F->isIntrinsic())
-      Node->addCalledFunction(CallSite(), CallsExternalNode);
-
-    // Look for calls by this function.
-    for (Function::iterator BB = F->begin(), BBE = F->end(); BB != BBE; ++BB)
-      for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
-           II != IE; ++II) {
-        CallSite CS(cast<Value>(II));
-        if (CS) {
-          const Function *Callee = CS.getCalledFunction();
-          if (!Callee)
-            // Indirect calls of intrinsics are not allowed so no need to check.
-            Node->addCalledFunction(CS, CallsExternalNode);
-          else if (!Callee->isIntrinsic())
-            Node->addCalledFunction(CS, getOrInsertFunction(Callee));
-        }
-      }
-  }
+void CallGraph::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+}
 
-  //
-  // destroy - Release memory for the call graph
-  virtual void destroy() {
-    /// CallsExternalNode is not in the function map, delete it explicitly.
-    if (CallsExternalNode) {
-      CallsExternalNode->allReferencesDropped();
-      delete CallsExternalNode;
-      CallsExternalNode = 0;
-    }
-    CallGraph::destroy();
-  }
-};
+bool CallGraph::runOnModule(Module &M) {
+  Mod = &M;
 
-} //End anonymous namespace
+  ExternalCallingNode = getOrInsertFunction(0);
+  assert(!CallsExternalNode);
+  CallsExternalNode = new CallGraphNode(0);
+  Root = 0;
 
-INITIALIZE_ANALYSIS_GROUP(CallGraph, "Call Graph", BasicCallGraph)
-INITIALIZE_AG_PASS(BasicCallGraph, CallGraph, "basiccg",
-                   "Basic CallGraph Construction", false, true, true)
+  // Add every function to the call graph.
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+    addToCallGraph(I);
 
-char CallGraph::ID = 0;
-char BasicCallGraph::ID = 0;
+  // If we didn't find a main function, use the external call graph node
+  if (Root == 0)
+    Root = ExternalCallingNode;
 
-void CallGraph::initialize(Module &M) {
-  Mod = &M;
+  return false;
 }
 
-void CallGraph::destroy() {
-  if (FunctionMap.empty()) return;
-  
-  // Reset all node's use counts to zero before deleting them to prevent an
-  // assertion from firing.
+INITIALIZE_PASS(CallGraph, "basiccg", "CallGraph Construction", false, true)
+
+char CallGraph::ID = 0;
+
+void CallGraph::releaseMemory() {
+  /// CallsExternalNode is not in the function map, delete it explicitly.
+  if (CallsExternalNode) {
+    CallsExternalNode->allReferencesDropped();
+    delete CallsExternalNode;
+    CallsExternalNode = 0;
+  }
+
+  if (FunctionMap.empty())
+    return;
+
+// Reset all node's use counts to zero before deleting them to prevent an
+// assertion from firing.
 #ifndef NDEBUG
   for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
        I != E; ++I)
@@ -195,7 +114,14 @@ void CallGraph::destroy() {
   FunctionMap.clear();
 }
 
-void CallGraph::print(raw_ostream &OS, Module*) const {
+void CallGraph::print(raw_ostream &OS, const Module*) const {
+  OS << "CallGraph Root is: ";
+  if (Function *F = Root->getFunction())
+    OS << F->getName() << "\n";
+  else {
+    OS << "<<null function: 0x" << Root << ">>\n";
+  }
+
   for (CallGraph::const_iterator I = begin(), E = end(); I != E; ++I)
     I->second->print(OS);
 }
diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp
index a0d788f..182beca 100644
--- a/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -22,7 +22,7 @@
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IntrinsicInst.h"
-#include "llvm/PassManagers.h"
+#include "llvm/IR/LegacyPassManagers.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Timer.h"
diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp
index 92d0d23..7ec4644 100644
--- a/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -189,7 +189,7 @@ char GlobalsModRef::ID = 0;
 INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis,
                 "globalsmodref-aa", "Simple mod/ref analysis for globals",    
                 false, true, false)
-INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_DEPENDENCY(CallGraph)
 INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis,
                 "globalsmodref-aa", "Simple mod/ref analysis for globals",    
                 false, true, false)
diff --git a/lib/Analysis/IPA/IPA.cpp b/lib/Analysis/IPA/IPA.cpp
index 1c1816d..47357cf 100644
--- a/lib/Analysis/IPA/IPA.cpp
+++ b/lib/Analysis/IPA/IPA.cpp
@@ -19,8 +19,7 @@ using namespace llvm;
 
 /// initializeIPA - Initialize all passes linked into the IPA library.
 void llvm::initializeIPA(PassRegistry &Registry) {
-  initializeBasicCallGraphPass(Registry);
-  initializeCallGraphAnalysisGroup(Registry);
+  initializeCallGraphPass(Registry);
   initializeCallGraphPrinterPass(Registry);
   initializeCallGraphViewerPass(Registry);
   initializeFindUsedTypesPass(Registry);
diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp
index 37d73a8..3bc796e 100644
--- a/lib/Analysis/IPA/InlineCost.cpp
+++ b/lib/Analysis/IPA/InlineCost.cpp
@@ -59,6 +59,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   bool ExposesReturnsTwice;
   bool HasDynamicAlloca;
   bool ContainsNoDuplicateCall;
+  bool HasReturn;
+  bool HasIndirectBr;
 
   /// Number of bytes allocated statically by the callee.
   uint64_t AllocatedSize;
@@ -132,6 +134,12 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   bool visitExtractValue(ExtractValueInst &I);
   bool visitInsertValue(InsertValueInst &I);
   bool visitCallSite(CallSite CS);
+  bool visitReturnInst(ReturnInst &RI);
+  bool visitBranchInst(BranchInst &BI);
+  bool visitSwitchInst(SwitchInst &SI);
+  bool visitIndirectBrInst(IndirectBrInst &IBI);
+  bool visitResumeInst(ResumeInst &RI);
+  bool visitUnreachableInst(UnreachableInst &I);
 
 public:
   CallAnalyzer(const DataLayout *TD, const TargetTransformInfo &TTI,
@@ -139,12 +147,13 @@ public:
       : TD(TD), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0),
         IsCallerRecursive(false), IsRecursiveCall(false),
         ExposesReturnsTwice(false), HasDynamicAlloca(false),
-        ContainsNoDuplicateCall(false), AllocatedSize(0), NumInstructions(0),
-        NumVectorInstructions(0), FiftyPercentVectorBonus(0),
-        TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
-        NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
-        NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
-        SROACostSavings(0), SROACostSavingsLost(0) {}
+        ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
+        AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0),
+        FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
+        NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
+        NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
+        NumInstructionsSimplified(0), SROACostSavings(0),
+        SROACostSavingsLost(0) {}
 
   bool analyzeCall(CallSite CS);
 
@@ -704,7 +713,7 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) {
 }
 
 bool CallAnalyzer::visitCallSite(CallSite CS) {
-  if (CS.isCall() && cast<CallInst>(CS.getInstruction())->canReturnTwice() &&
+  if (CS.hasFnAttr(Attribute::ReturnsTwice) &&
       !F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
                                       Attribute::ReturnsTwice)) {
     // This aborts the entire analysis.
@@ -785,6 +794,60 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
   return Base::visitCallSite(CS);
 }
 
+bool CallAnalyzer::visitReturnInst(ReturnInst &RI) {
+  // At least one return instruction will be free after inlining.
+  bool Free = !HasReturn;
+  HasReturn = true;
+  return Free;
+}
+
+bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
+  // We model unconditional branches as essentially free -- they really
+  // shouldn't exist at all, but handling them makes the behavior of the
+  // inliner more regular and predictable. Interestingly, conditional branches
+  // which will fold away are also free.
+  return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
+         dyn_cast_or_null<ConstantInt>(
+             SimplifiedValues.lookup(BI.getCondition()));
+}
+
+bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
+  // We model unconditional switches as free, see the comments on handling
+  // branches.
+  return isa<ConstantInt>(SI.getCondition()) ||
+         dyn_cast_or_null<ConstantInt>(
+             SimplifiedValues.lookup(SI.getCondition()));
+}
+
+bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
+  // We never want to inline functions that contain an indirectbr.  This is
+  // incorrect because all the blockaddress's (in static global initializers
+  // for example) would be referring to the original function, and this
+  // indirect jump would jump from the inlined copy of the function into the
+  // original function which is extremely undefined behavior.
+  // FIXME: This logic isn't really right; we can safely inline functions with
+  // indirectbr's as long as no other function or global references the
+  // blockaddress of a block within the current function.  And as a QOI issue,
+  // if someone is using a blockaddress without an indirectbr, and that
+  // reference somehow ends up in another function or global, we probably don't
+  // want to inline this function.
+  HasIndirectBr = true;
+  return false;
+}
+
+bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
+  // FIXME: It's not clear that a single instruction is an accurate model for
+  // the inline cost of a resume instruction.
+  return false;
+}
+
+bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
+  // FIXME: It might be reasonably to discount the cost of instructions leading
+  // to unreachable as they have the lowest possible impact on both runtime and
+  // code size.
+  return true; // No actual code is needed for unreachable.
+}
+
 bool CallAnalyzer::visitInstruction(Instruction &I) {
   // Some instructions are free. All of the free intrinsics can also be
   // handled by SROA, etc.
@@ -808,8 +871,7 @@ bool CallAnalyzer::visitInstruction(Instruction &I) {
 /// construct has been detected. It returns false if inlining is no longer
 /// viable, and true if inlining remains viable.
 bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
-  for (BasicBlock::iterator I = BB->begin(), E = llvm::prior(BB->end());
-       I != E; ++I) {
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
     ++NumInstructions;
     if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
       ++NumVectorInstructions;
@@ -825,7 +887,8 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
       Cost += InlineConstants::InstrCost;
 
     // If the visit this instruction detected an uninlinable pattern, abort.
-    if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
+    if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
+        HasIndirectBr)
       return false;
 
     // If the caller is a recursive function then we don't want to inline
@@ -989,10 +1052,6 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
     }
   }
 
-  // Track whether we've seen a return instruction. The first return
-  // instruction is free, as at least one will usually disappear in inlining.
-  bool HasReturn = false;
-
   // Populate our simplified values by mapping from function arguments to call
   // arguments with known important simplifications.
   CallSite::arg_iterator CAI = CS.arg_begin();
@@ -1039,33 +1098,11 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
     if (BB->empty())
       continue;
 
-    // Handle the terminator cost here where we can track returns and other
-    // function-wide constructs.
-    TerminatorInst *TI = BB->getTerminator();
-
-    // We never want to inline functions that contain an indirectbr.  This is
-    // incorrect because all the blockaddress's (in static global initializers
-    // for example) would be referring to the original function, and this
-    // indirect jump would jump from the inlined copy of the function into the 
-    // original function which is extremely undefined behavior.
-    // FIXME: This logic isn't really right; we can safely inline functions
-    // with indirectbr's as long as no other function or global references the
-    // blockaddress of a block within the current function.  And as a QOI issue,
-    // if someone is using a blockaddress without an indirectbr, and that
-    // reference somehow ends up in another function or global, we probably
-    // don't want to inline this function.
-    if (isa<IndirectBrInst>(TI))
-      return false;
-
-    if (!HasReturn && isa<ReturnInst>(TI))
-      HasReturn = true;
-    else
-      Cost += InlineConstants::InstrCost;
-
     // Analyze the cost of this block. If we blow through the threshold, this
     // returns false, and we can bail on out.
     if (!analyzeBlock(BB)) {
-      if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
+      if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
+          HasIndirectBr)
         return false;
 
       // If the caller is a recursive function then we don't want to inline
@@ -1078,6 +1115,8 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
       break;
     }
 
+    TerminatorInst *TI = BB->getTerminator();
+
     // Add in the live successors by first checking whether we have terminator
     // that may be simplified based on the values simplified by this call.
     if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
@@ -1171,6 +1210,22 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) {
   return getInlineCost(CS, CS.getCalledFunction(), Threshold);
 }
 
+/// \brief Test that two functions either have or have not the given attribute
+///        at the same time.
+static bool attributeMatches(Function *F1, Function *F2,
+                             Attribute::AttrKind Attr) {
+  return F1->hasFnAttribute(Attr) == F2->hasFnAttribute(Attr);
+}
+
+/// \brief Test that there are no attribute conflicts between Caller and Callee
+///        that prevent inlining.
+static bool functionsHaveCompatibleAttributes(Function *Caller,
+                                              Function *Callee) {
+  return attributeMatches(Caller, Callee, Attribute::SanitizeAddress) &&
+         attributeMatches(Caller, Callee, Attribute::SanitizeMemory) &&
+         attributeMatches(Caller, Callee, Attribute::SanitizeThread);
+}
+
 InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
                                              int Threshold) {
   // Cannot inline indirect calls.
@@ -1179,20 +1234,26 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
 
   // Calls to functions with always-inline attributes should be inlined
   // whenever possible.
-  if (Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                           Attribute::AlwaysInline)) {
+  if (Callee->hasFnAttribute(Attribute::AlwaysInline)) {
     if (isInlineViable(*Callee))
       return llvm::InlineCost::getAlways();
     return llvm::InlineCost::getNever();
   }
 
+  // Never inline functions with conflicting attributes (unless callee has
+  // always-inline attribute).
+  if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee))
+    return llvm::InlineCost::getNever();
+
+  // Don't inline this call if the caller has the optnone attribute.
+  if (CS.getCaller()->hasFnAttribute(Attribute::OptimizeNone))
+    return llvm::InlineCost::getNever();
+
   // Don't inline functions which can be redefined at link-time to mean
   // something else.  Don't inline functions marked noinline or call sites
   // marked noinline.
   if (Callee->mayBeOverridden() ||
-      Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                           Attribute::NoInline) ||
-      CS.isNoInline())
+      Callee->hasFnAttribute(Attribute::NoInline) || CS.isNoInline())
     return llvm::InlineCost::getNever();
 
   DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index b275dfe..b867af1 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -668,7 +668,8 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
 /// follow non-inbounds geps. This allows it to remain usable for icmp ult/etc.
 /// folding.
 static Constant *stripAndComputeConstantOffsets(const DataLayout *TD,
-                                                Value *&V) {
+                                                Value *&V,
+                                                bool AllowNonInbounds = false) {
   assert(V->getType()->getScalarType()->isPointerTy());
 
   // Without DataLayout, just be conservative for now. Theoretically, more could
@@ -685,7 +686,8 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout *TD,
   Visited.insert(V);
   do {
     if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
-      if (!GEP->isInBounds() || !GEP->accumulateConstantOffset(*TD, Offset))
+      if ((!AllowNonInbounds && !GEP->isInBounds()) ||
+          !GEP->accumulateConstantOffset(*TD, Offset))
         break;
       V = GEP->getPointerOperand();
     } else if (Operator::getOpcode(V) == Instruction::BitCast) {
@@ -1737,7 +1739,7 @@ static Constant *computePointerICmp(const DataLayout *TD,
   RHS = RHS->stripPointerCasts();
 
   // A non-null pointer is not equal to a null pointer.
-  if (llvm::isKnownNonNull(LHS) && isa<ConstantPointerNull>(RHS) &&
+  if (llvm::isKnownNonNull(LHS, TLI) && isa<ConstantPointerNull>(RHS) &&
       (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE))
     return ConstantInt::get(GetCompareTy(LHS),
                             !CmpInst::isTrueWhenEqual(Pred));
@@ -1837,6 +1839,17 @@ static Constant *computePointerICmp(const DataLayout *TD,
         return ConstantInt::get(GetCompareTy(LHS),
                                 !CmpInst::isTrueWhenEqual(Pred));
     }
+
+    // Even if an non-inbounds GEP occurs along the path we can still optimize
+    // equality comparisons concerning the result. We avoid walking the whole
+    // chain again by starting where the last calls to
+    // stripAndComputeConstantOffsets left off and accumulate the offsets.
+    Constant *LHSNoBound = stripAndComputeConstantOffsets(TD, LHS, true);
+    Constant *RHSNoBound = stripAndComputeConstantOffsets(TD, RHS, true);
+    if (LHS == RHS)
+      return ConstantExpr::getICmp(Pred,
+                                   ConstantExpr::getAdd(LHSOffset, LHSNoBound),
+                                   ConstantExpr::getAdd(RHSOffset, RHSNoBound));
   }
 
   // Otherwise, fail.
@@ -2946,6 +2959,7 @@ static bool IsIdempotent(Intrinsic::ID ID) {
   case Intrinsic::trunc:
   case Intrinsic::rint:
   case Intrinsic::nearbyint:
+  case Intrinsic::round:
     return true;
   }
 }
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index 9393508..ec17f47 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -207,7 +207,7 @@ void Lint::visitCallSite(CallSite CS) {
             &I);
 
     FunctionType *FT = F->getFunctionType();
-    unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin());
+    unsigned NumActualArgs = CS.arg_size();
 
     Assert1(FT->isVarArg() ?
               FT->getNumParams() <= NumActualArgs :
@@ -504,14 +504,42 @@ void Lint::visitShl(BinaryOperator &I) {
             "Undefined result: Shift count out of range", &I);
 }
 
-static bool isZero(Value *V, DataLayout *TD) {
+static bool isZero(Value *V, DataLayout *DL) {
   // Assume undef could be zero.
-  if (isa<UndefValue>(V)) return true;
+  if (isa<UndefValue>(V))
+    return true;
+
+  VectorType *VecTy = dyn_cast<VectorType>(V->getType());
+  if (!VecTy) {
+    unsigned BitWidth = V->getType()->getIntegerBitWidth();
+    APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+    ComputeMaskedBits(V, KnownZero, KnownOne, DL);
+    return KnownZero.isAllOnesValue();
+  }
+
+  // Per-component check doesn't work with zeroinitializer
+  Constant *C = dyn_cast<Constant>(V);
+  if (!C)
+    return false;
+
+  if (C->isZeroValue())
+    return true;
+
+  // For a vector, KnownZero will only be true if all values are zero, so check
+  // this per component
+  unsigned BitWidth = VecTy->getElementType()->getIntegerBitWidth();
+  for (unsigned I = 0, N = VecTy->getNumElements(); I != N; ++I) {
+    Constant *Elem = C->getAggregateElement(I);
+    if (isa<UndefValue>(Elem))
+      return true;
+
+    APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+    ComputeMaskedBits(Elem, KnownZero, KnownOne, DL);
+    if (KnownZero.isAllOnesValue())
+      return true;
+  }
 
-  unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
-  APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
-  ComputeMaskedBits(V, KnownZero, KnownOne, TD);
-  return KnownZero.isAllOnesValue();
+  return false;
 }
 
 void Lint::visitSDiv(BinaryOperator &I) {
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index 142ebed..e369633 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -177,10 +177,6 @@ PHINode *Loop::getCanonicalInductionVariable() const {
 
 /// isLCSSAForm - Return true if the Loop is in LCSSA form
 bool Loop::isLCSSAForm(DominatorTree &DT) const {
-  // Sort the blocks vector so that we can use binary search to do quick
-  // lookups.
-  SmallPtrSet<BasicBlock*, 16> LoopBBs(block_begin(), block_end());
-
   for (block_iterator BI = block_begin(), E = block_end(); BI != E; ++BI) {
     BasicBlock *BB = *BI;
     for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I)
@@ -196,7 +192,7 @@ bool Loop::isLCSSAForm(DominatorTree &DT) const {
         // block they are defined in.  Also, blocks not reachable from the
         // entry are special; uses in them don't need to go through PHIs.
         if (UserBB != BB &&
-            !LoopBBs.count(UserBB) &&
+            !contains(UserBB) &&
             DT.isReachableFromEntry(UserBB))
           return false;
       }
@@ -220,12 +216,12 @@ bool Loop::isSafeToClone() const {
   // Return false if any loop blocks contain indirectbrs, or there are any calls
   // to noduplicate functions.
   for (Loop::block_iterator I = block_begin(), E = block_end(); I != E; ++I) {
-    if (isa<IndirectBrInst>((*I)->getTerminator())) {
+    if (isa<IndirectBrInst>((*I)->getTerminator()))
       return false;
-    } else if (const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator())) {
+
+    if (const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator()))
       if (II->hasFnAttr(Attribute::NoDuplicate))
         return false;
-    }
 
     for (BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end(); BI != BE; ++BI) {
       if (const CallInst *CI = dyn_cast<CallInst>(BI)) {
@@ -309,15 +305,15 @@ bool Loop::isAnnotatedParallel() const {
       if (!II->mayReadOrWriteMemory())
         continue;
 
-      if (!II->getMetadata("llvm.mem.parallel_loop_access"))
-        return false;
-
       // The memory instruction can refer to the loop identifier metadata
       // directly or indirectly through another list metadata (in case of
       // nested parallel loops). The loop identifier metadata refers to
       // itself so we can check both cases with the same routine.
-      MDNode *loopIdMD =
-          dyn_cast<MDNode>(II->getMetadata("llvm.mem.parallel_loop_access"));
+      MDNode *loopIdMD = II->getMetadata("llvm.mem.parallel_loop_access");
+
+      if (!loopIdMD)
+        return false;
+
       bool loopIdMDFound = false;
       for (unsigned i = 0, e = loopIdMD->getNumOperands(); i < e; ++i) {
         if (loopIdMD->getOperand(i) == desiredLoopIdMetadata) {
@@ -337,9 +333,6 @@ bool Loop::isAnnotatedParallel() const {
 /// hasDedicatedExits - Return true if no exit block for the loop
 /// has a predecessor that is outside the loop.
 bool Loop::hasDedicatedExits() const {
-  // Sort the blocks vector so that we can use binary search to do quick
-  // lookups.
-  SmallPtrSet<BasicBlock *, 16> LoopBBs(block_begin(), block_end());
   // Each predecessor of each exit block of a normal loop is contained
   // within the loop.
   SmallVector<BasicBlock *, 4> ExitBlocks;
@@ -347,7 +340,7 @@ bool Loop::hasDedicatedExits() const {
   for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
     for (pred_iterator PI = pred_begin(ExitBlocks[i]),
          PE = pred_end(ExitBlocks[i]); PI != PE; ++PI)
-      if (!LoopBBs.count(*PI))
+      if (!contains(*PI))
         return false;
   // All the requirements are met.
   return true;
@@ -362,11 +355,6 @@ Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const {
   assert(hasDedicatedExits() &&
          "getUniqueExitBlocks assumes the loop has canonical form exits!");
 
-  // Sort the blocks vector so that we can use binary search to do quick
-  // lookups.
-  SmallVector<BasicBlock *, 128> LoopBBs(block_begin(), block_end());
-  std::sort(LoopBBs.begin(), LoopBBs.end());
-
   SmallVector<BasicBlock *, 32> switchExitBlocks;
 
   for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI) {
@@ -376,7 +364,7 @@ Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const {
 
     for (succ_iterator I = succ_begin(*BI), E = succ_end(*BI); I != E; ++I) {
       // If block is inside the loop then it is not a exit block.
-      if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), *I))
+      if (contains(*I))
         continue;
 
       pred_iterator PI = pred_begin(*I);
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index 0f0a1c9..1db0f63 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -31,12 +31,13 @@
 using namespace llvm;
 
 enum AllocType {
-  MallocLike         = 1<<0, // allocates
-  CallocLike         = 1<<1, // allocates + bzero
-  ReallocLike        = 1<<2, // reallocates
-  StrDupLike         = 1<<3,
+  OpNewLike          = 1<<0, // allocates; never returns null
+  MallocLike         = 1<<1 | OpNewLike, // allocates; may return null
+  CallocLike         = 1<<2, // allocates + bzero
+  ReallocLike        = 1<<3, // reallocates
+  StrDupLike         = 1<<4,
   AllocLike          = MallocLike | CallocLike | StrDupLike,
-  AnyAlloc           = MallocLike | CallocLike | ReallocLike | StrDupLike
+  AnyAlloc           = AllocLike | ReallocLike
 };
 
 struct AllocFnsTy {
@@ -52,20 +53,20 @@ struct AllocFnsTy {
 static const AllocFnsTy AllocationFnData[] = {
   {LibFunc::malloc,              MallocLike,  1, 0,  -1},
   {LibFunc::valloc,              MallocLike,  1, 0,  -1},
-  {LibFunc::Znwj,                MallocLike,  1, 0,  -1}, // new(unsigned int)
+  {LibFunc::Znwj,                OpNewLike,   1, 0,  -1}, // new(unsigned int)
   {LibFunc::ZnwjRKSt9nothrow_t,  MallocLike,  2, 0,  -1}, // new(unsigned int, nothrow)
-  {LibFunc::Znwm,                MallocLike,  1, 0,  -1}, // new(unsigned long)
+  {LibFunc::Znwm,                OpNewLike,   1, 0,  -1}, // new(unsigned long)
   {LibFunc::ZnwmRKSt9nothrow_t,  MallocLike,  2, 0,  -1}, // new(unsigned long, nothrow)
-  {LibFunc::Znaj,                MallocLike,  1, 0,  -1}, // new[](unsigned int)
+  {LibFunc::Znaj,                OpNewLike,   1, 0,  -1}, // new[](unsigned int)
   {LibFunc::ZnajRKSt9nothrow_t,  MallocLike,  2, 0,  -1}, // new[](unsigned int, nothrow)
-  {LibFunc::Znam,                MallocLike,  1, 0,  -1}, // new[](unsigned long)
+  {LibFunc::Znam,                OpNewLike,   1, 0,  -1}, // new[](unsigned long)
   {LibFunc::ZnamRKSt9nothrow_t,  MallocLike,  2, 0,  -1}, // new[](unsigned long, nothrow)
-  {LibFunc::posix_memalign,      MallocLike,  3, 2,  -1},
   {LibFunc::calloc,              CallocLike,  2, 0,   1},
   {LibFunc::realloc,             ReallocLike, 2, 1,  -1},
   {LibFunc::reallocf,            ReallocLike, 2, 1,  -1},
   {LibFunc::strdup,              StrDupLike,  1, -1, -1},
   {LibFunc::strndup,             StrDupLike,  2, 1,  -1}
+  // TODO: Handle "int posix_memalign(void **, size_t, size_t)"
 };
 
 
@@ -117,7 +118,7 @@ static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy,
     return 0;
 
   const AllocFnsTy *FnData = &AllocationFnData[i];
-  if ((FnData->AllocTy & AllocTy) == 0)
+  if ((FnData->AllocTy & AllocTy) != FnData->AllocTy)
     return 0;
 
   // Check function prototype.
@@ -189,6 +190,13 @@ bool llvm::isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
   return getAllocationData(V, ReallocLike, TLI, LookThroughBitCast);
 }
 
+/// \brief Tests if a value is a call or invoke to a library function that
+/// allocates memory and never returns null (such as operator new).
+bool llvm::isOperatorNewLikeFn(const Value *V, const TargetLibraryInfo *TLI,
+                               bool LookThroughBitCast) {
+  return getAllocationData(V, OpNewLike, TLI, LookThroughBitCast);
+}
+
 /// extractMallocCall - Returns the corresponding CallInst if the instruction
 /// is a malloc call.  Since CallInst::CreateMalloc() only creates calls, we
 /// ignore InvokeInst here.
@@ -197,7 +205,7 @@ const CallInst *llvm::extractMallocCall(const Value *I,
   return isMallocLikeFn(I, TLI) ? dyn_cast<CallInst>(I) : 0;
 }
 
-static Value *computeArraySize(const CallInst *CI, const DataLayout *TD,
+static Value *computeArraySize(const CallInst *CI, const DataLayout *DL,
                                const TargetLibraryInfo *TLI,
                                bool LookThroughSExt = false) {
   if (!CI)
@@ -205,12 +213,12 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *TD,
 
   // The size of the malloc's result type must be known to determine array size.
   Type *T = getMallocAllocatedType(CI, TLI);
-  if (!T || !T->isSized() || !TD)
+  if (!T || !T->isSized() || !DL)
     return 0;
 
-  unsigned ElementSize = TD->getTypeAllocSize(T);
+  unsigned ElementSize = DL->getTypeAllocSize(T);
   if (StructType *ST = dyn_cast<StructType>(T))
-    ElementSize = TD->getStructLayout(ST)->getSizeInBytes();
+    ElementSize = DL->getStructLayout(ST)->getSizeInBytes();
 
   // If malloc call's arg can be determined to be a multiple of ElementSize,
   // return the multiple.  Otherwise, return NULL.
@@ -227,10 +235,10 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *TD,
 /// is a call to malloc whose array size can be determined and the array size
 /// is not constant 1.  Otherwise, return NULL.
 const CallInst *llvm::isArrayMalloc(const Value *I,
-                                    const DataLayout *TD,
+                                    const DataLayout *DL,
                                     const TargetLibraryInfo *TLI) {
   const CallInst *CI = extractMallocCall(I, TLI);
-  Value *ArraySize = computeArraySize(CI, TD, TLI);
+  Value *ArraySize = computeArraySize(CI, DL, TLI);
 
   if (ConstantInt *ConstSize = dyn_cast_or_null<ConstantInt>(ArraySize))
     if (ConstSize->isOne())
@@ -288,11 +296,11 @@ Type *llvm::getMallocAllocatedType(const CallInst *CI,
 /// then return that multiple.  For non-array mallocs, the multiple is
 /// constant 1.  Otherwise, return NULL for mallocs whose array size cannot be
 /// determined.
-Value *llvm::getMallocArraySize(CallInst *CI, const DataLayout *TD,
+Value *llvm::getMallocArraySize(CallInst *CI, const DataLayout *DL,
                                 const TargetLibraryInfo *TLI,
                                 bool LookThroughSExt) {
   assert(isMallocLikeFn(CI, TLI) && "getMallocArraySize and not malloc call");
-  return computeArraySize(CI, TD, TLI, LookThroughSExt);
+  return computeArraySize(CI, DL, TLI, LookThroughSExt);
 }
 
 
@@ -354,12 +362,12 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
 /// object size in Size if successful, and false otherwise.
 /// If RoundToAlign is true, then Size is rounded up to the aligment of allocas,
 /// byval arguments, and global variables.
-bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *TD,
+bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *DL,
                          const TargetLibraryInfo *TLI, bool RoundToAlign) {
-  if (!TD)
+  if (!DL)
     return false;
 
-  ObjectSizeOffsetVisitor Visitor(TD, TLI, Ptr->getContext(), RoundToAlign);
+  ObjectSizeOffsetVisitor Visitor(DL, TLI, Ptr->getContext(), RoundToAlign);
   SizeOffsetType Data = Visitor.compute(const_cast<Value*>(Ptr));
   if (!Visitor.bothKnown(Data))
     return false;
@@ -386,12 +394,12 @@ APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) {
   return Size;
 }
 
-ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *TD,
+ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *DL,
                                                  const TargetLibraryInfo *TLI,
                                                  LLVMContext &Context,
                                                  bool RoundToAlign)
-: TD(TD), TLI(TLI), RoundToAlign(RoundToAlign) {
-  IntegerType *IntTy = TD->getIntPtrType(Context);
+: DL(DL), TLI(TLI), RoundToAlign(RoundToAlign) {
+  IntegerType *IntTy = DL->getIntPtrType(Context);
   IntTyBits = IntTy->getBitWidth();
   Zero = APInt::getNullValue(IntTyBits);
 }
@@ -434,7 +442,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) {
   if (!I.getAllocatedType()->isSized())
     return unknown();
 
-  APInt Size(IntTyBits, TD->getTypeAllocSize(I.getAllocatedType()));
+  APInt Size(IntTyBits, DL->getTypeAllocSize(I.getAllocatedType()));
   if (!I.isArrayAllocation())
     return std::make_pair(align(Size, I.getAlignment()), Zero);
 
@@ -453,7 +461,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) {
     return unknown();
   }
   PointerType *PT = cast<PointerType>(A.getType());
-  APInt Size(IntTyBits, TD->getTypeAllocSize(PT->getElementType()));
+  APInt Size(IntTyBits, DL->getTypeAllocSize(PT->getElementType()));
   return std::make_pair(align(Size, A.getParamAlignment()), Zero);
 }
 
@@ -526,7 +534,7 @@ ObjectSizeOffsetVisitor::visitExtractValueInst(ExtractValueInst&) {
 SizeOffsetType ObjectSizeOffsetVisitor::visitGEPOperator(GEPOperator &GEP) {
   SizeOffsetType PtrData = compute(GEP.getPointerOperand());
   APInt Offset(IntTyBits, 0);
-  if (!bothKnown(PtrData) || !GEP.accumulateConstantOffset(*TD, Offset))
+  if (!bothKnown(PtrData) || !GEP.accumulateConstantOffset(*DL, Offset))
     return unknown();
 
   return std::make_pair(PtrData.first, PtrData.second + Offset);
@@ -542,7 +550,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalVariable(GlobalVariable &GV){
   if (!GV.hasDefinitiveInitializer())
     return unknown();
 
-  APInt Size(IntTyBits, TD->getTypeAllocSize(GV.getType()->getElementType()));
+  APInt Size(IntTyBits, DL->getTypeAllocSize(GV.getType()->getElementType()));
   return std::make_pair(align(Size, GV.getAlignment()), Zero);
 }
 
@@ -578,12 +586,13 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitInstruction(Instruction &I) {
   return unknown();
 }
 
-
-ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const DataLayout *TD,
-                                                   const TargetLibraryInfo *TLI,
-                                                     LLVMContext &Context)
-: TD(TD), TLI(TLI), Context(Context), Builder(Context, TargetFolder(TD)) {
-  IntTy = TD->getIntPtrType(Context);
+ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const DataLayout *DL,
+                                                     const TargetLibraryInfo *TLI,
+                                                     LLVMContext &Context,
+                                                     bool RoundToAlign)
+: DL(DL), TLI(TLI), Context(Context), Builder(Context, TargetFolder(DL)),
+  RoundToAlign(RoundToAlign) {
+  IntTy = DL->getIntPtrType(Context);
   Zero = ConstantInt::get(IntTy, 0);
 }
 
@@ -607,7 +616,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) {
 }
 
 SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) {
-  ObjectSizeOffsetVisitor Visitor(TD, TLI, Context);
+  ObjectSizeOffsetVisitor Visitor(DL, TLI, Context, RoundToAlign);
   SizeOffsetType Const = Visitor.compute(V);
   if (Visitor.bothKnown(Const))
     return std::make_pair(ConstantInt::get(Context, Const.first),
@@ -626,13 +635,15 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) {
   if (Instruction *I = dyn_cast<Instruction>(V))
     Builder.SetInsertPoint(I);
 
-  // record the pointers that were handled in this run, so that they can be
-  // cleaned later if something fails
-  SeenVals.insert(V);
-
   // now compute the size and offset
   SizeOffsetEvalType Result;
-  if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+
+  // Record the pointers that were handled in this run, so that they can be
+  // cleaned later if something fails. We also use this set to break cycles that
+  // can occur in dead code.
+  if (!SeenVals.insert(V)) {
+    Result = unknown();
+  } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
     Result = visitGEPOperator(*GEP);
   } else if (Instruction *I = dyn_cast<Instruction>(V)) {
     Result = visit(*I);
@@ -665,7 +676,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitAllocaInst(AllocaInst &I) {
   assert(I.isArrayAllocation());
   Value *ArraySize = I.getArraySize();
   Value *Size = ConstantInt::get(ArraySize->getType(),
-                                 TD->getTypeAllocSize(I.getAllocatedType()));
+                                 DL->getTypeAllocSize(I.getAllocatedType()));
   Size = Builder.CreateMul(Size, ArraySize);
   return std::make_pair(Size, Zero);
 }
@@ -717,7 +728,7 @@ ObjectSizeOffsetEvaluator::visitGEPOperator(GEPOperator &GEP) {
   if (!bothKnown(PtrData))
     return unknown();
 
-  Value *Offset = EmitGEPOffset(&Builder, *TD, &GEP, /*NoAssumptions=*/true);
+  Value *Offset = EmitGEPOffset(&Builder, *DL, &GEP, /*NoAssumptions=*/true);
   Offset = Builder.CreateAdd(PtrData.second, Offset);
   return std::make_pair(PtrData.first, Offset);
 }
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index fe1c874..84ff2ee 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -371,18 +371,19 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
 
   // Walk backwards through the basic block, looking for dependencies.
   while (ScanIt != BB->begin()) {
+    Instruction *Inst = --ScanIt;
+
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
+      // Debug intrinsics don't (and can't) cause dependencies.
+      if (isa<DbgInfoIntrinsic>(II)) continue;
+
     // Limit the amount of scanning we do so we don't end up with quadratic
     // running time on extreme testcases.
     --Limit;
     if (!Limit)
       return MemDepResult::getUnknown();
 
-    Instruction *Inst = --ScanIt;
-
     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
-      // Debug intrinsics don't (and can't) cause dependences.
-      if (isa<DbgInfoIntrinsic>(II)) continue;
-
       // If we reach a lifetime begin or end marker, then the query ends here
       // because the value is undefined.
       if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
diff --git a/lib/Analysis/PathNumbering.cpp b/lib/Analysis/PathNumbering.cpp
deleted file mode 100644
index 30d213b..0000000
--- a/lib/Analysis/PathNumbering.cpp
+++ /dev/null
@@ -1,521 +0,0 @@
-//===- PathNumbering.cpp --------------------------------------*- C++ -*---===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Ball-Larus path numbers uniquely identify paths through a directed acyclic
-// graph (DAG) [Ball96].  For a CFG backedges are removed and replaced by phony
-// edges to obtain a DAG, and thus the unique path numbers [Ball96].
-//
-// The purpose of this analysis is to enumerate the edges in a CFG in order
-// to obtain paths from path numbers in a convenient manner.  As described in
-// [Ball96] edges can be enumerated such that given a path number by following
-// the CFG and updating the path number, the path is obtained.
-//
-// [Ball96]
-//  T. Ball and J. R. Larus. "Efficient Path Profiling."
-//  International Symposium on Microarchitecture, pages 46-57, 1996.
-//  http://portal.acm.org/citation.cfm?id=243857
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "ball-larus-numbering"
-
-#include "llvm/Analysis/PathNumbering.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/TypeBuilder.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include <queue>
-#include <sstream>
-#include <stack>
-#include <string>
-#include <utility>
-
-using namespace llvm;
-
-// Are we enabling early termination
-static cl::opt<bool> ProcessEarlyTermination(
-  "path-profile-early-termination", cl::Hidden,
-  cl::desc("In path profiling, insert extra instrumentation to account for "
-           "unexpected function termination."));
-
-// Returns the basic block for the BallLarusNode
-BasicBlock* BallLarusNode::getBlock() {
-  return(_basicBlock);
-}
-
-// Returns the number of paths to the exit starting at the node.
-unsigned BallLarusNode::getNumberPaths() {
-  return(_numberPaths);
-}
-
-// Sets the number of paths to the exit starting at the node.
-void BallLarusNode::setNumberPaths(unsigned numberPaths) {
-  _numberPaths = numberPaths;
-}
-
-// Gets the NodeColor used in graph algorithms.
-BallLarusNode::NodeColor BallLarusNode::getColor() {
-  return(_color);
-}
-
-// Sets the NodeColor used in graph algorithms.
-void BallLarusNode::setColor(BallLarusNode::NodeColor color) {
-  _color = color;
-}
-
-// Returns an iterator over predecessor edges. Includes phony and
-// backedges.
-BLEdgeIterator BallLarusNode::predBegin() {
-  return(_predEdges.begin());
-}
-
-// Returns the end sentinel for the predecessor iterator.
-BLEdgeIterator BallLarusNode::predEnd() {
-  return(_predEdges.end());
-}
-
-// Returns the number of predecessor edges.  Includes phony and
-// backedges.
-unsigned BallLarusNode::getNumberPredEdges() {
-  return(_predEdges.size());
-}
-
-// Returns an iterator over successor edges. Includes phony and
-// backedges.
-BLEdgeIterator BallLarusNode::succBegin() {
-  return(_succEdges.begin());
-}
-
-// Returns the end sentinel for the successor iterator.
-BLEdgeIterator BallLarusNode::succEnd() {
-  return(_succEdges.end());
-}
-
-// Returns the number of successor edges.  Includes phony and
-// backedges.
-unsigned BallLarusNode::getNumberSuccEdges() {
-  return(_succEdges.size());
-}
-
-// Add an edge to the predecessor list.
-void BallLarusNode::addPredEdge(BallLarusEdge* edge) {
-  _predEdges.push_back(edge);
-}
-
-// Remove an edge from the predecessor list.
-void BallLarusNode::removePredEdge(BallLarusEdge* edge) {
-  removeEdge(_predEdges, edge);
-}
-
-// Add an edge to the successor list.
-void BallLarusNode::addSuccEdge(BallLarusEdge* edge) {
-  _succEdges.push_back(edge);
-}
-
-// Remove an edge from the successor list.
-void BallLarusNode::removeSuccEdge(BallLarusEdge* edge) {
-  removeEdge(_succEdges, edge);
-}
-
-// Returns the name of the BasicBlock being represented.  If BasicBlock
-// is null then returns "<null>".  If BasicBlock has no name, then
-// "<unnamed>" is returned.  Intended for use with debug output.
-std::string BallLarusNode::getName() {
-  std::stringstream name;
-
-  if(getBlock() != NULL) {
-    if(getBlock()->hasName()) {
-      std::string tempName(getBlock()->getName());
-      name << tempName.c_str() << " (" << _uid << ")";
-    } else
-      name << "<unnamed> (" << _uid << ")";
-  } else
-    name << "<null> (" << _uid << ")";
-
-  return name.str();
-}
-
-// Removes an edge from an edgeVector.  Used by removePredEdge and
-// removeSuccEdge.
-void BallLarusNode::removeEdge(BLEdgeVector& v, BallLarusEdge* e) {
-  // TODO: Avoid linear scan by using a set instead
-  for(BLEdgeIterator i = v.begin(),
-        end = v.end();
-      i != end;
-      ++i) {
-    if((*i) == e) {
-      v.erase(i);
-      break;
-    }
-  }
-}
-
-// Returns the source node of this edge.
-BallLarusNode* BallLarusEdge::getSource() const {
-  return(_source);
-}
-
-// Returns the target node of this edge.
-BallLarusNode* BallLarusEdge::getTarget() const {
-  return(_target);
-}
-
-// Sets the type of the edge.
-BallLarusEdge::EdgeType BallLarusEdge::getType() const {
-  return _edgeType;
-}
-
-// Gets the type of the edge.
-void BallLarusEdge::setType(EdgeType type) {
-  _edgeType = type;
-}
-
-// Returns the weight of this edge.  Used to decode path numbers to sequences
-// of basic blocks.
-unsigned BallLarusEdge::getWeight() {
-  return(_weight);
-}
-
-// Sets the weight of the edge.  Used during path numbering.
-void BallLarusEdge::setWeight(unsigned weight) {
-  _weight = weight;
-}
-
-// Gets the phony edge originating at the root.
-BallLarusEdge* BallLarusEdge::getPhonyRoot() {
-  return _phonyRoot;
-}
-
-// Sets the phony edge originating at the root.
-void BallLarusEdge::setPhonyRoot(BallLarusEdge* phonyRoot) {
-  _phonyRoot = phonyRoot;
-}
-
-// Gets the phony edge terminating at the exit.
-BallLarusEdge* BallLarusEdge::getPhonyExit() {
-  return _phonyExit;
-}
-
-// Sets the phony edge terminating at the exit.
-void BallLarusEdge::setPhonyExit(BallLarusEdge* phonyExit) {
-  _phonyExit = phonyExit;
-}
-
-// Gets the associated real edge if this is a phony edge.
-BallLarusEdge* BallLarusEdge::getRealEdge() {
-  return _realEdge;
-}
-
-// Sets the associated real edge if this is a phony edge.
-void BallLarusEdge::setRealEdge(BallLarusEdge* realEdge) {
-  _realEdge = realEdge;
-}
-
-// Returns the duplicate number of the edge.
-unsigned BallLarusEdge::getDuplicateNumber() {
-  return(_duplicateNumber);
-}
-
-// Initialization that requires virtual functions which are not fully
-// functional in the constructor.
-void BallLarusDag::init() {
-  BLBlockNodeMap inDag;
-  std::stack<BallLarusNode*> dfsStack;
-
-  _root = addNode(&(_function.getEntryBlock()));
-  _exit = addNode(NULL);
-
-  // start search from root
-  dfsStack.push(getRoot());
-
-  // dfs to add each bb into the dag
-  while(dfsStack.size())
-    buildNode(inDag, dfsStack);
-
-  // put in the final edge
-  addEdge(getExit(),getRoot(),0);
-}
-
-// Frees all memory associated with the DAG.
-BallLarusDag::~BallLarusDag() {
-  for(BLEdgeIterator edge = _edges.begin(), end = _edges.end(); edge != end;
-      ++edge)
-    delete (*edge);
-
-  for(BLNodeIterator node = _nodes.begin(), end = _nodes.end(); node != end;
-      ++node)
-    delete (*node);
-}
-
-// Calculate the path numbers by assigning edge increments as prescribed
-// in Ball-Larus path profiling.
-void BallLarusDag::calculatePathNumbers() {
-  BallLarusNode* node;
-  std::queue<BallLarusNode*> bfsQueue;
-  bfsQueue.push(getExit());
-
-  while(bfsQueue.size() > 0) {
-    node = bfsQueue.front();
-
-    DEBUG(dbgs() << "calculatePathNumbers on " << node->getName() << "\n");
-
-    bfsQueue.pop();
-    unsigned prevPathNumber = node->getNumberPaths();
-    calculatePathNumbersFrom(node);
-
-    // Check for DAG splitting
-    if( node->getNumberPaths() > 100000000 && node != getRoot() ) {
-      // Add new phony edge from the split-node to the DAG's exit
-      BallLarusEdge* exitEdge = addEdge(node, getExit(), 0);
-      exitEdge->setType(BallLarusEdge::SPLITEDGE_PHONY);
-
-      // Counters to handle the possibility of a multi-graph
-      BasicBlock* oldTarget = 0;
-      unsigned duplicateNumber = 0;
-
-      // Iterate through each successor edge, adding phony edges
-      for( BLEdgeIterator succ = node->succBegin(), end = node->succEnd();
-           succ != end; oldTarget = (*succ)->getTarget()->getBlock(), succ++ ) {
-
-        if( (*succ)->getType() == BallLarusEdge::NORMAL ) {
-          // is this edge a duplicate?
-          if( oldTarget != (*succ)->getTarget()->getBlock() )
-            duplicateNumber = 0;
-
-          // create the new phony edge: root -> succ
-          BallLarusEdge* rootEdge =
-            addEdge(getRoot(), (*succ)->getTarget(), duplicateNumber++);
-          rootEdge->setType(BallLarusEdge::SPLITEDGE_PHONY);
-          rootEdge->setRealEdge(*succ);
-
-          // split on this edge and reference it's exit/root phony edges
-          (*succ)->setType(BallLarusEdge::SPLITEDGE);
-          (*succ)->setPhonyRoot(rootEdge);
-          (*succ)->setPhonyExit(exitEdge);
-          (*succ)->setWeight(0);
-        }
-      }
-
-      calculatePathNumbersFrom(node);
-    }
-
-    DEBUG(dbgs() << "prev, new number paths " << prevPathNumber << ", "
-          << node->getNumberPaths() << ".\n");
-
-    if(prevPathNumber == 0 && node->getNumberPaths() != 0) {
-      DEBUG(dbgs() << "node ready : " << node->getName() << "\n");
-      for(BLEdgeIterator pred = node->predBegin(), end = node->predEnd();
-          pred != end; pred++) {
-        if( (*pred)->getType() == BallLarusEdge::BACKEDGE ||
-            (*pred)->getType() == BallLarusEdge::SPLITEDGE )
-          continue;
-
-        BallLarusNode* nextNode = (*pred)->getSource();
-        // not yet visited?
-        if(nextNode->getNumberPaths() == 0)
-          bfsQueue.push(nextNode);
-      }
-    }
-  }
-
-  DEBUG(dbgs() << "\tNumber of paths: " << getRoot()->getNumberPaths() << "\n");
-}
-
-// Returns the number of paths for the Dag.
-unsigned BallLarusDag::getNumberOfPaths() {
-  return(getRoot()->getNumberPaths());
-}
-
-// Returns the root (i.e. entry) node for the DAG.
-BallLarusNode* BallLarusDag::getRoot() {
-  return _root;
-}
-
-// Returns the exit node for the DAG.
-BallLarusNode* BallLarusDag::getExit() {
-  return _exit;
-}
-
-// Returns the function for the DAG.
-Function& BallLarusDag::getFunction() {
-  return(_function);
-}
-
-// Clears the node colors.
-void BallLarusDag::clearColors(BallLarusNode::NodeColor color) {
-  for (BLNodeIterator nodeIt = _nodes.begin(); nodeIt != _nodes.end(); nodeIt++)
-    (*nodeIt)->setColor(color);
-}
-
-// Processes one node and its imediate edges for building the DAG.
-void BallLarusDag::buildNode(BLBlockNodeMap& inDag, BLNodeStack& dfsStack) {
-  BallLarusNode* currentNode = dfsStack.top();
-  BasicBlock* currentBlock = currentNode->getBlock();
-
-  if(currentNode->getColor() != BallLarusNode::WHITE) {
-    // we have already visited this node
-    dfsStack.pop();
-    currentNode->setColor(BallLarusNode::BLACK);
-  } else {
-    // are there any external procedure calls?
-    if( ProcessEarlyTermination ) {
-      for( BasicBlock::iterator bbCurrent = currentNode->getBlock()->begin(),
-             bbEnd = currentNode->getBlock()->end(); bbCurrent != bbEnd;
-           bbCurrent++ ) {
-        Instruction& instr = *bbCurrent;
-        if( instr.getOpcode() == Instruction::Call ) {
-          BallLarusEdge* callEdge = addEdge(currentNode, getExit(), 0);
-          callEdge->setType(BallLarusEdge::CALLEDGE_PHONY);
-          break;
-        }
-      }
-    }
-
-    TerminatorInst* terminator = currentNode->getBlock()->getTerminator();
-    if(isa<ReturnInst>(terminator) || isa<UnreachableInst>(terminator) ||
-       isa<ResumeInst>(terminator))
-      addEdge(currentNode, getExit(),0);
-
-    currentNode->setColor(BallLarusNode::GRAY);
-    inDag[currentBlock] = currentNode;
-
-    BasicBlock* oldSuccessor = 0;
-    unsigned duplicateNumber = 0;
-
-    // iterate through this node's successors
-    for(succ_iterator successor = succ_begin(currentBlock),
-          succEnd = succ_end(currentBlock); successor != succEnd;
-        oldSuccessor = *successor, ++successor ) {
-      BasicBlock* succBB = *successor;
-
-      // is this edge a duplicate?
-      if (oldSuccessor == succBB)
-        duplicateNumber++;
-      else
-        duplicateNumber = 0;
-
-      buildEdge(inDag, dfsStack, currentNode, succBB, duplicateNumber);
-    }
-  }
-}
-
-// Process an edge in the CFG for DAG building.
-void BallLarusDag::buildEdge(BLBlockNodeMap& inDag, std::stack<BallLarusNode*>&
-                             dfsStack, BallLarusNode* currentNode,
-                             BasicBlock* succBB, unsigned duplicateCount) {
-  BallLarusNode* succNode = inDag[succBB];
-
-  if(succNode && succNode->getColor() == BallLarusNode::BLACK) {
-    // visited node and forward edge
-    addEdge(currentNode, succNode, duplicateCount);
-  } else if(succNode && succNode->getColor() == BallLarusNode::GRAY) {
-    // visited node and back edge
-    DEBUG(dbgs() << "Backedge detected.\n");
-    addBackedge(currentNode, succNode, duplicateCount);
-  } else {
-    BallLarusNode* childNode;
-    // not visited node and forward edge
-    if(succNode) // an unvisited node that is child of a gray node
-      childNode = succNode;
-    else { // an unvisited node that is a child of a an unvisted node
-      childNode = addNode(succBB);
-      inDag[succBB] = childNode;
-    }
-    addEdge(currentNode, childNode, duplicateCount);
-    dfsStack.push(childNode);
-  }
-}
-
-// The weight on each edge is the increment required along any path that
-// contains that edge.
-void BallLarusDag::calculatePathNumbersFrom(BallLarusNode* node) {
-  if(node == getExit())
-    // The Exit node must be base case
-    node->setNumberPaths(1);
-  else {
-    unsigned sumPaths = 0;
-    BallLarusNode* succNode;
-
-    for(BLEdgeIterator succ = node->succBegin(), end = node->succEnd();
-        succ != end; succ++) {
-      if( (*succ)->getType() == BallLarusEdge::BACKEDGE ||
-          (*succ)->getType() == BallLarusEdge::SPLITEDGE )
-        continue;
-
-      (*succ)->setWeight(sumPaths);
-      succNode = (*succ)->getTarget();
-
-      if( !succNode->getNumberPaths() )
-        return;
-      sumPaths += succNode->getNumberPaths();
-    }
-
-    node->setNumberPaths(sumPaths);
-  }
-}
-
-// Allows subclasses to determine which type of Node is created.
-// Override this method to produce subclasses of BallLarusNode if
-// necessary. The destructor of BallLarusDag will call free on each
-// pointer created.
-BallLarusNode* BallLarusDag::createNode(BasicBlock* BB) {
-  return( new BallLarusNode(BB) );
-}
-
-// Allows subclasses to determine which type of Edge is created.
-// Override this method to produce subclasses of BallLarusEdge if
-// necessary. The destructor of BallLarusDag will call free on each
-// pointer created.
-BallLarusEdge* BallLarusDag::createEdge(BallLarusNode* source,
-                                        BallLarusNode* target,
-                                        unsigned duplicateCount) {
-  return( new BallLarusEdge(source, target, duplicateCount) );
-}
-
-// Proxy to node's constructor.  Updates the DAG state.
-BallLarusNode* BallLarusDag::addNode(BasicBlock* BB) {
-  BallLarusNode* newNode = createNode(BB);
-  _nodes.push_back(newNode);
-  return( newNode );
-}
-
-// Proxy to edge's constructor. Updates the DAG state.
-BallLarusEdge* BallLarusDag::addEdge(BallLarusNode* source,
-                                     BallLarusNode* target,
-                                     unsigned duplicateCount) {
-  BallLarusEdge* newEdge = createEdge(source, target, duplicateCount);
-  _edges.push_back(newEdge);
-  source->addSuccEdge(newEdge);
-  target->addPredEdge(newEdge);
-  return(newEdge);
-}
-
-// Adds a backedge with its phony edges. Updates the DAG state.
-void BallLarusDag::addBackedge(BallLarusNode* source, BallLarusNode* target,
-                               unsigned duplicateCount) {
-  BallLarusEdge* childEdge = addEdge(source, target, duplicateCount);
-  childEdge->setType(BallLarusEdge::BACKEDGE);
-
-  childEdge->setPhonyRoot(addEdge(getRoot(), target,0));
-  childEdge->setPhonyExit(addEdge(source, getExit(),0));
-
-  childEdge->getPhonyRoot()->setRealEdge(childEdge);
-  childEdge->getPhonyRoot()->setType(BallLarusEdge::BACKEDGE_PHONY);
-
-  childEdge->getPhonyExit()->setRealEdge(childEdge);
-  childEdge->getPhonyExit()->setType(BallLarusEdge::BACKEDGE_PHONY);
-  _backEdges.push_back(childEdge);
-}
diff --git a/lib/Analysis/PathProfileInfo.cpp b/lib/Analysis/PathProfileInfo.cpp
deleted file mode 100644
index bc53221..0000000
--- a/lib/Analysis/PathProfileInfo.cpp
+++ /dev/null
@@ -1,433 +0,0 @@
-//===- PathProfileInfo.cpp ------------------------------------*- C++ -*---===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the interface used by optimizers to load path profiles,
-// and provides a loader pass which reads a path profile file.
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "path-profile-info"
-
-#include "llvm/Analysis/PathProfileInfo.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/ProfileInfoTypes.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cstdio>
-
-using namespace llvm;
-
-// command line option for loading path profiles
-static cl::opt<std::string>
-PathProfileInfoFilename("path-profile-loader-file", cl::init("llvmprof.out"),
-  cl::value_desc("filename"),
-  cl::desc("Path profile file loaded by -path-profile-loader"), cl::Hidden);
-
-namespace {
-  class PathProfileLoaderPass : public ModulePass, public PathProfileInfo {
-  public:
-    PathProfileLoaderPass() : ModulePass(ID) { }
-    ~PathProfileLoaderPass();
-
-    // this pass doesn't change anything (only loads information)
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesAll();
-    }
-
-    // the full name of the loader pass
-    virtual const char* getPassName() const {
-      return "Path Profiling Information Loader";
-    }
-
-    // required since this pass implements multiple inheritance
-                virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
-      if (PI == &PathProfileInfo::ID)
-        return (PathProfileInfo*)this;
-      return this;
-    }
-
-    // entry point to run the pass
-    bool runOnModule(Module &M);
-
-    // pass identification
-    static char ID;
-
-  private:
-    // make a reference table to refer to function by number
-    void buildFunctionRefs(Module &M);
-
-    // process argument info of a program from the input file
-    void handleArgumentInfo();
-
-    // process path number information from the input file
-    void handlePathInfo();
-
-    // array of references to the functions in the module
-    std::vector<Function*> _functions;
-
-    // path profile file handle
-    FILE* _file;
-
-    // path profile file name
-    std::string _filename;
-  };
-}
-
-// register PathLoader
-char PathProfileLoaderPass::ID = 0;
-
-INITIALIZE_ANALYSIS_GROUP(PathProfileInfo, "Path Profile Information",
-                          NoPathProfileInfo)
-INITIALIZE_AG_PASS(PathProfileLoaderPass, PathProfileInfo,
-                   "path-profile-loader",
-                   "Load path profile information from file",
-                   false, true, false)
-
-char &llvm::PathProfileLoaderPassID = PathProfileLoaderPass::ID;
-
-// link PathLoader as a pass, and make it available as an optimisation
-ModulePass *llvm::createPathProfileLoaderPass() {
-  return new PathProfileLoaderPass;
-}
-
-// ----------------------------------------------------------------------------
-// PathEdge implementation
-//
-ProfilePathEdge::ProfilePathEdge (BasicBlock* source, BasicBlock* target,
-                                  unsigned duplicateNumber)
-  : _source(source), _target(target), _duplicateNumber(duplicateNumber) {}
-
-// ----------------------------------------------------------------------------
-// Path implementation
-//
-
-ProfilePath::ProfilePath (unsigned int number, unsigned int count,
-                          double countStdDev,   PathProfileInfo* ppi)
-  : _number(number) , _count(count), _countStdDev(countStdDev), _ppi(ppi) {}
-
-double ProfilePath::getFrequency() const {
-  return 100 * double(_count) /
-    double(_ppi->_functionPathCounts[_ppi->_currentFunction]);
-}
-
-static BallLarusEdge* getNextEdge (BallLarusNode* node,
-                                   unsigned int pathNumber) {
-  BallLarusEdge* best = 0;
-
-  for( BLEdgeIterator next = node->succBegin(),
-         end = node->succEnd(); next != end; next++ ) {
-    if( (*next)->getType() != BallLarusEdge::BACKEDGE && // no backedges
-        (*next)->getType() != BallLarusEdge::SPLITEDGE && // no split edges
-        (*next)->getWeight() <= pathNumber && // weight must be <= pathNumber
-        (!best || (best->getWeight() < (*next)->getWeight())) ) // best one?
-      best = *next;
-  }
-
-  return best;
-}
-
-ProfilePathEdgeVector* ProfilePath::getPathEdges() const {
-  BallLarusNode* currentNode = _ppi->_currentDag->getRoot ();
-  unsigned int increment = _number;
-  ProfilePathEdgeVector* pev = new ProfilePathEdgeVector;
-
-  while (currentNode != _ppi->_currentDag->getExit()) {
-    BallLarusEdge* next = getNextEdge(currentNode, increment);
-
-    increment -= next->getWeight();
-
-    if( next->getType() != BallLarusEdge::BACKEDGE_PHONY &&
-        next->getType() != BallLarusEdge::SPLITEDGE_PHONY &&
-        next->getTarget() != _ppi->_currentDag->getExit() )
-      pev->push_back(ProfilePathEdge(
-                       next->getSource()->getBlock(),
-                       next->getTarget()->getBlock(),
-                       next->getDuplicateNumber()));
-
-    if( next->getType() == BallLarusEdge::BACKEDGE_PHONY &&
-        next->getTarget() == _ppi->_currentDag->getExit() )
-      pev->push_back(ProfilePathEdge(
-                       next->getRealEdge()->getSource()->getBlock(),
-                       next->getRealEdge()->getTarget()->getBlock(),
-                       next->getDuplicateNumber()));
-
-    if( next->getType() == BallLarusEdge::SPLITEDGE_PHONY &&
-        next->getSource() == _ppi->_currentDag->getRoot() )
-      pev->push_back(ProfilePathEdge(
-                       next->getRealEdge()->getSource()->getBlock(),
-                       next->getRealEdge()->getTarget()->getBlock(),
-                       next->getDuplicateNumber()));
-
-    // set the new node
-    currentNode = next->getTarget();
-  }
-
-  return pev;
-}
-
-ProfilePathBlockVector* ProfilePath::getPathBlocks() const {
-  BallLarusNode* currentNode = _ppi->_currentDag->getRoot ();
-  unsigned int increment = _number;
-  ProfilePathBlockVector* pbv = new ProfilePathBlockVector;
-
-  while (currentNode != _ppi->_currentDag->getExit()) {
-    BallLarusEdge* next = getNextEdge(currentNode, increment);
-    increment -= next->getWeight();
-
-    // add block to the block list if it is a real edge
-    if( next->getType() == BallLarusEdge::NORMAL)
-      pbv->push_back (currentNode->getBlock());
-    // make the back edge the last edge since we are at the end
-    else if( next->getTarget() == _ppi->_currentDag->getExit() ) {
-      pbv->push_back (currentNode->getBlock());
-      pbv->push_back (next->getRealEdge()->getTarget()->getBlock());
-    }
-
-    // set the new node
-    currentNode = next->getTarget();
-  }
-
-  return pbv;
-}
-
-BasicBlock* ProfilePath::getFirstBlockInPath() const {
-  BallLarusNode* root = _ppi->_currentDag->getRoot();
-  BallLarusEdge* edge = getNextEdge(root, _number);
-
-  if( edge && (edge->getType() == BallLarusEdge::BACKEDGE_PHONY ||
-               edge->getType() == BallLarusEdge::SPLITEDGE_PHONY) )
-    return edge->getTarget()->getBlock();
-
-  return root->getBlock();
-}
-
-// ----------------------------------------------------------------------------
-// PathProfileInfo implementation
-//
-
-// Pass identification
-char llvm::PathProfileInfo::ID = 0;
-
-PathProfileInfo::PathProfileInfo () : _currentDag(0) , _currentFunction(0) {
-}
-
-PathProfileInfo::~PathProfileInfo() {
-  if (_currentDag)
-    delete _currentDag;
-}
-
-// set the function for which paths are currently begin processed
-void PathProfileInfo::setCurrentFunction(Function* F) {
-  // Make sure it exists
-  if (!F) return;
-
-  if (_currentDag)
-    delete _currentDag;
-
-  _currentFunction = F;
-  _currentDag = new BallLarusDag(*F);
-  _currentDag->init();
-  _currentDag->calculatePathNumbers();
-}
-
-// get the function for which paths are currently being processed
-Function* PathProfileInfo::getCurrentFunction() const {
-  return _currentFunction;
-}
-
-// get the entry block of the function
-BasicBlock* PathProfileInfo::getCurrentFunctionEntry() {
-  return _currentDag->getRoot()->getBlock();
-}
-
-// return the path based on its number
-ProfilePath* PathProfileInfo::getPath(unsigned int number) {
-  return _functionPaths[_currentFunction][number];
-}
-
-// return the number of paths which a function may potentially execute
-unsigned int PathProfileInfo::getPotentialPathCount() {
-  return _currentDag ? _currentDag->getNumberOfPaths() : 0;
-}
-
-// return an iterator for the beginning of a functions executed paths
-ProfilePathIterator PathProfileInfo::pathBegin() {
-  return _functionPaths[_currentFunction].begin();
-}
-
-// return an iterator for the end of a functions executed paths
-ProfilePathIterator PathProfileInfo::pathEnd() {
-  return _functionPaths[_currentFunction].end();
-}
-
-// returns the total number of paths run in the function
-unsigned int PathProfileInfo::pathsRun() {
-  return _currentFunction ? _functionPaths[_currentFunction].size() : 0;
-}
-
-// ----------------------------------------------------------------------------
-// PathLoader implementation
-//
-
-// remove all generated paths
-PathProfileLoaderPass::~PathProfileLoaderPass() {
-  for( FunctionPathIterator funcNext = _functionPaths.begin(),
-         funcEnd = _functionPaths.end(); funcNext != funcEnd; funcNext++)
-    for( ProfilePathIterator pathNext = funcNext->second.begin(),
-           pathEnd = funcNext->second.end(); pathNext != pathEnd; pathNext++)
-      delete pathNext->second;
-}
-
-// entry point of the pass; this loads and parses a file
-bool PathProfileLoaderPass::runOnModule(Module &M) {
-  // get the filename and setup the module's function references
-  _filename = PathProfileInfoFilename;
-  buildFunctionRefs (M);
-
-  if (!(_file = fopen(_filename.c_str(), "rb"))) {
-    errs () << "error: input '" << _filename << "' file does not exist.\n";
-    return false;
-  }
-
-  ProfilingType profType;
-
-  while( fread(&profType, sizeof(ProfilingType), 1, _file) ) {
-    switch (profType) {
-    case ArgumentInfo:
-      handleArgumentInfo ();
-      break;
-    case PathInfo:
-      handlePathInfo ();
-      break;
-    default:
-      errs () << "error: bad path profiling file syntax, " << profType << "\n";
-      fclose (_file);
-      return false;
-    }
-  }
-
-  fclose (_file);
-
-  return true;
-}
-
-// create a reference table for functions defined in the path profile file
-void PathProfileLoaderPass::buildFunctionRefs (Module &M) {
-  _functions.push_back(0); // make the 0 index a null pointer
-
-  for (Module::iterator F = M.begin(), E = M.end(); F != E; F++) {
-    if (F->isDeclaration())
-      continue;
-    _functions.push_back(F);
-  }
-}
-
-// handle command like argument infor in the output file
-void PathProfileLoaderPass::handleArgumentInfo() {
-  // get the argument list's length
-  unsigned savedArgsLength;
-  if( fread(&savedArgsLength, sizeof(unsigned), 1, _file) != 1 ) {
-    errs() << "warning: argument info header/data mismatch\n";
-    return;
-  }
-
-  // allocate a buffer, and get the arguments
-  char* args = new char[savedArgsLength+1];
-  if( fread(args, 1, savedArgsLength, _file) != savedArgsLength )
-    errs() << "warning: argument info header/data mismatch\n";
-
-  args[savedArgsLength] = '\0';
-  argList = std::string(args);
-  delete [] args; // cleanup dynamic string
-
-  // byte alignment
-  if (savedArgsLength & 3)
-    fseek(_file, 4-(savedArgsLength&3), SEEK_CUR);
-}
-
-// Handle path profile information in the output file
-void PathProfileLoaderPass::handlePathInfo () {
-  // get the number of functions in this profile
-  unsigned functionCount;
-  if( fread(&functionCount, sizeof(functionCount), 1, _file) != 1 ) {
-    errs() << "warning: path info header/data mismatch\n";
-    return;
-  }
-
-  // gather path information for each function
-  for (unsigned i = 0; i < functionCount; i++) {
-    PathProfileHeader pathHeader;
-    if( fread(&pathHeader, sizeof(pathHeader), 1, _file) != 1 ) {
-      errs() << "warning: bad header for path function info\n";
-      break;
-    }
-
-    Function* f = _functions[pathHeader.fnNumber];
-
-    // dynamically allocate a table to store path numbers
-    PathProfileTableEntry* pathTable =
-      new PathProfileTableEntry[pathHeader.numEntries];
-
-    if( fread(pathTable, sizeof(PathProfileTableEntry),
-              pathHeader.numEntries, _file) != pathHeader.numEntries) {
-      delete [] pathTable;
-      errs() << "warning: path function info header/data mismatch\n";
-      return;
-    }
-
-    // Build a new path for the current function
-    unsigned int totalPaths = 0;
-    for (unsigned int j = 0; j < pathHeader.numEntries; j++) {
-      totalPaths += pathTable[j].pathCounter;
-      _functionPaths[f][pathTable[j].pathNumber]
-        = new ProfilePath(pathTable[j].pathNumber, pathTable[j].pathCounter,
-                          0, this);
-    }
-
-    _functionPathCounts[f] = totalPaths;
-
-    delete [] pathTable;
-  }
-}
-
-//===----------------------------------------------------------------------===//
-//  NoProfile PathProfileInfo implementation
-//
-
-namespace {
-  struct NoPathProfileInfo : public ImmutablePass, public PathProfileInfo {
-    static char ID; // Class identification, replacement for typeinfo
-    NoPathProfileInfo() : ImmutablePass(ID) {
-      initializeNoPathProfileInfoPass(*PassRegistry::getPassRegistry());
-    }
-
-    /// getAdjustedAnalysisPointer - This method is used when a pass implements
-    /// an analysis interface through multiple inheritance.  If needed, it
-    /// should override this to adjust the this pointer as needed for the
-    /// specified pass info.
-    virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
-      if (PI == &PathProfileInfo::ID)
-        return (PathProfileInfo*)this;
-      return this;
-    }
-
-    virtual const char *getPassName() const {
-      return "NoPathProfileInfo";
-    }
-  };
-}  // End of anonymous namespace
-
-char NoPathProfileInfo::ID = 0;
-// Register this pass...
-INITIALIZE_AG_PASS(NoPathProfileInfo, PathProfileInfo, "no-path-profile",
-                   "No Path Profile Information", false, true, true)
-
-ImmutablePass *llvm::createNoPathProfileInfoPass() { return new NoPathProfileInfo(); }
diff --git a/lib/Analysis/PathProfileVerifier.cpp b/lib/Analysis/PathProfileVerifier.cpp
deleted file mode 100644
index 48d7d05..0000000
--- a/lib/Analysis/PathProfileVerifier.cpp
+++ /dev/null
@@ -1,206 +0,0 @@
-//===- PathProfileVerifier.cpp --------------------------------*- C++ -*---===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This verifier derives an edge profile file from current path profile
-// information
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "path-profile-verifier"
-
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/PathProfileInfo.h"
-#include "llvm/Analysis/ProfileInfoTypes.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include <stdio.h>
-
-using namespace llvm;
-
-namespace {
-  class PathProfileVerifier : public ModulePass {
-  private:
-    bool runOnModule(Module &M);
-
-  public:
-    static char ID; // Pass identification, replacement for typeid
-    PathProfileVerifier() : ModulePass(ID) {
-      initializePathProfileVerifierPass(*PassRegistry::getPassRegistry());
-    }
-
-
-    virtual const char *getPassName() const {
-      return "Path Profiler Verifier";
-    }
-
-    // The verifier requires the path profile and edge profile.
-    virtual void getAnalysisUsage(AnalysisUsage& AU) const;
-  };
-}
-
-static cl::opt<std::string>
-EdgeProfileFilename("path-profile-verifier-file",
-  cl::init("edgefrompath.llvmprof.out"),
-  cl::value_desc("filename"),
-  cl::desc("Edge profile file generated by -path-profile-verifier"),
-  cl::Hidden);
-
-char PathProfileVerifier::ID = 0;
-INITIALIZE_PASS(PathProfileVerifier, "path-profile-verifier",
-                "Compare the path profile derived edge profile against the "
-                "edge profile.", true, true)
-
-ModulePass *llvm::createPathProfileVerifierPass() {
-  return new PathProfileVerifier();
-}
-
-// The verifier requires the path profile and edge profile.
-void PathProfileVerifier::getAnalysisUsage(AnalysisUsage& AU) const {
-  AU.addRequired<PathProfileInfo>();
-  AU.addPreserved<PathProfileInfo>();
-}
-
-typedef std::map<unsigned, unsigned> DuplicateToIndexMap;
-typedef std::map<BasicBlock*,DuplicateToIndexMap> BlockToDuplicateMap;
-typedef std::map<BasicBlock*,BlockToDuplicateMap> NestedBlockToIndexMap;
-
-// the verifier iterates through each path to gather the total
-// number of edge frequencies
-bool PathProfileVerifier::runOnModule (Module &M) {
-  PathProfileInfo& pathProfileInfo = getAnalysis<PathProfileInfo>();
-
-  // setup a data structure to map path edges which index an
-  // array of edge counters
-  NestedBlockToIndexMap arrayMap;
-  unsigned i = 0;
-  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
-    if (F->isDeclaration()) continue;
-
-    arrayMap[(BasicBlock*)0][F->begin()][0] = i++;
-
-    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
-      TerminatorInst *TI = BB->getTerminator();
-
-      unsigned duplicate = 0;
-      BasicBlock* prev = 0;
-      for (unsigned s = 0, e = TI->getNumSuccessors(); s != e;
-           prev = TI->getSuccessor(s), ++s) {
-        if (prev == TI->getSuccessor(s))
-          duplicate++;
-        else duplicate = 0;
-
-        arrayMap[BB][TI->getSuccessor(s)][duplicate] = i++;
-      }
-    }
-  }
-
-  std::vector<unsigned> edgeArray(i);
-
-  // iterate through each path and increment the edge counters as needed
-  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
-    if (F->isDeclaration()) continue;
-
-    pathProfileInfo.setCurrentFunction(F);
-
-    DEBUG(dbgs() << "function '" << F->getName() << "' ran "
-          << pathProfileInfo.pathsRun()
-          << "/" << pathProfileInfo.getPotentialPathCount()
-          << " potential paths\n");
-
-    for( ProfilePathIterator nextPath = pathProfileInfo.pathBegin(),
-           endPath = pathProfileInfo.pathEnd();
-         nextPath != endPath; nextPath++ ) {
-      ProfilePath* currentPath = nextPath->second;
-
-      ProfilePathEdgeVector* pev = currentPath->getPathEdges();
-      DEBUG(dbgs () << "path #" << currentPath->getNumber() << ": "
-            << currentPath->getCount() << "\n");
-      // setup the entry edge (normally path profiling doesn't care about this)
-      if (currentPath->getFirstBlockInPath() == &F->getEntryBlock())
-        edgeArray[arrayMap[(BasicBlock*)0][currentPath->getFirstBlockInPath()][0]]
-          += currentPath->getCount();
-
-      for( ProfilePathEdgeIterator nextEdge = pev->begin(),
-             endEdge = pev->end(); nextEdge != endEdge; nextEdge++ ) {
-        if (nextEdge != pev->begin())
-          DEBUG(dbgs() << " :: ");
-
-        BasicBlock* source = nextEdge->getSource();
-        BasicBlock* target = nextEdge->getTarget();
-        unsigned duplicateNumber = nextEdge->getDuplicateNumber();
-        DEBUG(dbgs() << source->getName() << " --{" << duplicateNumber
-                     << "}--> " << target->getName());
-
-        // Ensure all the referenced edges exist
-        // TODO: make this a separate function
-        if( !arrayMap.count(source) ) {
-          errs() << "  error [" << F->getName() << "()]: source '"
-                 << source->getName()
-                 << "' does not exist in the array map.\n";
-        } else if( !arrayMap[source].count(target) ) {
-          errs() << "  error [" << F->getName() << "()]: target '"
-                 << target->getName()
-                 << "' does not exist in the array map.\n";
-        } else if( !arrayMap[source][target].count(duplicateNumber) ) {
-          errs() << "  error [" << F->getName() << "()]: edge "
-                 << source->getName() << " -> " << target->getName()
-                 << " duplicate number " << duplicateNumber
-                 << " does not exist in the array map.\n";
-        } else {
-          edgeArray[arrayMap[source][target][duplicateNumber]]
-            += currentPath->getCount();
-        }
-      }
-
-      DEBUG(errs() << "\n");
-
-      delete pev;
-    }
-  }
-
-  std::string errorInfo;
-  std::string filename = EdgeProfileFilename;
-
-  // Open a handle to the file
-  FILE* edgeFile = fopen(filename.c_str(),"wb");
-
-  if (!edgeFile) {
-    errs() << "error: unable to open file '" << filename << "' for output.\n";
-    return false;
-  }
-
-  errs() << "Generating edge profile '" << filename << "' ...\n";
-
-  // write argument info
-  unsigned type = ArgumentInfo;
-  unsigned num = pathProfileInfo.argList.size();
-  int zeros = 0;
-
-  fwrite(&type,sizeof(unsigned),1,edgeFile);
-  fwrite(&num,sizeof(unsigned),1,edgeFile);
-  fwrite(pathProfileInfo.argList.c_str(),1,num,edgeFile);
-  if (num&3)
-    fwrite(&zeros, 1, 4-(num&3), edgeFile);
-
-  type = EdgeInfo;
-  num = edgeArray.size();
-  fwrite(&type,sizeof(unsigned),1,edgeFile);
-  fwrite(&num,sizeof(unsigned),1,edgeFile);
-
-  // write each edge to the file
-  for( std::vector<unsigned>::iterator s = edgeArray.begin(),
-         e = edgeArray.end(); s != e; s++)
-    fwrite(&*s, sizeof (unsigned), 1, edgeFile);
-
-  fclose (edgeFile);
-
-  return true;
-}
diff --git a/lib/Analysis/ProfileDataLoader.cpp b/lib/Analysis/ProfileDataLoader.cpp
deleted file mode 100644
index 3d0a1e2..0000000
--- a/lib/Analysis/ProfileDataLoader.cpp
+++ /dev/null
@@ -1,155 +0,0 @@
-//===- ProfileDataLoader.cpp - Load profile information from disk ---------===//
-//
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// The ProfileDataLoader class is used to load raw profiling data from the dump
-// file.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/ProfileDataLoader.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/Analysis/ProfileDataTypes.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/system_error.h"
-#include <cstdio>
-#include <cstdlib>
-using namespace llvm;
-
-raw_ostream &llvm::operator<<(raw_ostream &O, std::pair<const BasicBlock *,
-                                                        const BasicBlock *> E) {
-  O << "(";
-
-  if (E.first)
-    O << E.first->getName();
-  else
-    O << "0";
-
-  O << ",";
-
-  if (E.second)
-    O << E.second->getName();
-  else
-    O << "0";
-
-  return O << ")";
-}
-
-/// AddCounts - Add 'A' and 'B', accounting for the fact that the value of one
-/// (or both) may not be defined.
-static unsigned AddCounts(unsigned A, unsigned B) {
-  // If either value is undefined, use the other.
-  // Undefined + undefined = undefined.
-  if (A == ProfileDataLoader::Uncounted) return B;
-  if (B == ProfileDataLoader::Uncounted) return A;
-
-  return A + B;
-}
-
-/// ReadProfilingData - Load 'NumEntries' items of type 'T' from file 'F'
-template <typename T>
-static void ReadProfilingData(const char *ToolName, FILE *F,
-                              T *Data, size_t NumEntries) {
-  // Read in the block of data...
-  if (fread(Data, sizeof(T), NumEntries, F) != NumEntries)
-    report_fatal_error(Twine(ToolName) + ": Profiling data truncated");
-}
-
-/// ReadProfilingNumEntries - Read how many entries are in this profiling data
-/// packet.
-static unsigned ReadProfilingNumEntries(const char *ToolName, FILE *F,
-                                        bool ShouldByteSwap) {
-  unsigned Entry;
-  ReadProfilingData<unsigned>(ToolName, F, &Entry, 1);
-  return ShouldByteSwap ? ByteSwap_32(Entry) : Entry;
-}
-
-/// ReadProfilingBlock - Read the number of entries in the next profiling data
-/// packet and then accumulate the entries into 'Data'.
-static void ReadProfilingBlock(const char *ToolName, FILE *F,
-                               bool ShouldByteSwap,
-                               SmallVectorImpl<unsigned> &Data) {
-  // Read the number of entries...
-  unsigned NumEntries = ReadProfilingNumEntries(ToolName, F, ShouldByteSwap);
-
-  // Read in the data.
-  SmallVector<unsigned, 8> TempSpace(NumEntries);
-  ReadProfilingData<unsigned>(ToolName, F, TempSpace.data(), NumEntries);
-
-  // Make sure we have enough space ...
-  if (Data.size() < NumEntries)
-    Data.resize(NumEntries, ProfileDataLoader::Uncounted);
-
-  // Accumulate the data we just read into the existing data.
-  for (unsigned i = 0; i < NumEntries; ++i) {
-    unsigned Entry = ShouldByteSwap ? ByteSwap_32(TempSpace[i]) : TempSpace[i];
-    Data[i] = AddCounts(Entry, Data[i]);
-  }
-}
-
-/// ReadProfilingArgBlock - Read the command line arguments that the progam was
-/// run with when the current profiling data packet(s) were generated.
-static void ReadProfilingArgBlock(const char *ToolName, FILE *F,
-                                  bool ShouldByteSwap,
-                                  SmallVectorImpl<std::string> &CommandLines) {
-  // Read the number of bytes ...
-  unsigned ArgLength = ReadProfilingNumEntries(ToolName, F, ShouldByteSwap);
-
-  // Read in the arguments (if there are any to read).  Round up the length to
-  // the nearest 4-byte multiple.
-  SmallVector<char, 8> Args(ArgLength+4);
-  if (ArgLength)
-    ReadProfilingData<char>(ToolName, F, Args.data(), (ArgLength+3) & ~3);
-
-  // Store the arguments.
-  CommandLines.push_back(std::string(&Args[0], &Args[ArgLength]));
-}
-
-const unsigned ProfileDataLoader::Uncounted = ~0U;
-
-/// ProfileDataLoader ctor - Read the specified profiling data file, reporting
-/// a fatal error if the file is invalid or broken.
-ProfileDataLoader::ProfileDataLoader(const char *ToolName,
-                                     const std::string &Filename)
-  : Filename(Filename) {
-  FILE *F = fopen(Filename.c_str(), "rb");
-  if (F == 0)
-    report_fatal_error(Twine(ToolName) + ": Error opening '" +
-                       Filename + "': ");
-
-  // Keep reading packets until we run out of them.
-  unsigned PacketType;
-  while (fread(&PacketType, sizeof(unsigned), 1, F) == 1) {
-    // If the low eight bits of the packet are zero, we must be dealing with an
-    // endianness mismatch.  Byteswap all words read from the profiling
-    // information.  This can happen when the compiler host and target have
-    // different endianness.
-    bool ShouldByteSwap = (char)PacketType == 0;
-    PacketType = ShouldByteSwap ? ByteSwap_32(PacketType) : PacketType;
-
-    switch (PacketType) {
-      case ArgumentInfo:
-        ReadProfilingArgBlock(ToolName, F, ShouldByteSwap, CommandLines);
-        break;
-
-      case EdgeInfo:
-        ReadProfilingBlock(ToolName, F, ShouldByteSwap, EdgeCounts);
-        break;
-
-      default:
-        report_fatal_error(std::string(ToolName)
-                           + ": Unknown profiling packet type");
-        break;
-    }
-  }
-
-  fclose(F);
-}
diff --git a/lib/Analysis/ProfileDataLoaderPass.cpp b/lib/Analysis/ProfileDataLoaderPass.cpp
deleted file mode 100644
index 2ee0093..0000000
--- a/lib/Analysis/ProfileDataLoaderPass.cpp
+++ /dev/null
@@ -1,188 +0,0 @@
-//===- ProfileDataLoaderPass.cpp - Set branch weight metadata from prof ---===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass loads profiling data from a dump file and sets branch weight
-// metadata.
-//
-// TODO: Replace all "profile-metadata-loader" strings with "profile-loader"
-// once ProfileInfo etc. has been removed.
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "profile-metadata-loader"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/ProfileDataLoader.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/MDBuilder.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-STATISTIC(NumEdgesRead, "The # of edges read.");
-STATISTIC(NumTermsAnnotated, "The # of terminator instructions annotated.");
-
-static cl::opt<std::string>
-ProfileMetadataFilename("profile-file", cl::init("llvmprof.out"),
-                  cl::value_desc("filename"),
-                  cl::desc("Profile file loaded by -profile-metadata-loader"));
-
-namespace {
-  /// This pass loads profiling data from a dump file and sets branch weight
-  /// metadata.
-  class ProfileMetadataLoaderPass : public ModulePass {
-    std::string Filename;
-  public:
-    static char ID; // Class identification, replacement for typeinfo
-    explicit ProfileMetadataLoaderPass(const std::string &filename = "")
-        : ModulePass(ID), Filename(filename) {
-      initializeProfileMetadataLoaderPassPass(*PassRegistry::getPassRegistry());
-      if (filename.empty()) Filename = ProfileMetadataFilename;
-    }
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesAll();
-    }
-
-    virtual const char *getPassName() const {
-      return "Profile loader";
-    }
-
-    virtual void readEdge(unsigned, ProfileData&, ProfileData::Edge,
-                          ArrayRef<unsigned>);
-    virtual unsigned matchEdges(Module&, ProfileData&, ArrayRef<unsigned>);
-    virtual void setBranchWeightMetadata(Module&, ProfileData&);
-
-    virtual bool runOnModule(Module &M);
-  };
-}  // End of anonymous namespace
-
-char ProfileMetadataLoaderPass::ID = 0;
-INITIALIZE_PASS_BEGIN(ProfileMetadataLoaderPass, "profile-metadata-loader",
-              "Load profile information from llvmprof.out", false, true)
-INITIALIZE_PASS_END(ProfileMetadataLoaderPass, "profile-metadata-loader",
-              "Load profile information from llvmprof.out", false, true)
-
-char &llvm::ProfileMetadataLoaderPassID = ProfileMetadataLoaderPass::ID;
-
-/// createProfileMetadataLoaderPass - This function returns a Pass that loads
-/// the profiling information for the module from the specified filename,
-/// making it available to the optimizers.
-ModulePass *llvm::createProfileMetadataLoaderPass() { 
-    return new ProfileMetadataLoaderPass();
-}
-ModulePass *llvm::createProfileMetadataLoaderPass(const std::string &Filename) {
-  return new ProfileMetadataLoaderPass(Filename);
-}
-
-/// readEdge - Take the value from a profile counter and assign it to an edge.
-void ProfileMetadataLoaderPass::readEdge(unsigned ReadCount,
-                                         ProfileData &PB, ProfileData::Edge e,
-                                         ArrayRef<unsigned> Counters) {
-  if (ReadCount >= Counters.size()) return;
-
-  unsigned weight = Counters[ReadCount];
-  assert(weight != ProfileDataLoader::Uncounted);
-  PB.addEdgeWeight(e, weight);
-
-  DEBUG(dbgs() << "-- Read Edge Counter for " << e
-               << " (# "<< (ReadCount) << "): "
-               << PB.getEdgeWeight(e) << "\n");
-}
-
-/// matchEdges - Link every profile counter with an edge.
-unsigned ProfileMetadataLoaderPass::matchEdges(Module &M, ProfileData &PB,
-                                               ArrayRef<unsigned> Counters) {
-  if (Counters.size() == 0) return 0;
-
-  unsigned ReadCount = 0;
-
-  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
-    if (F->isDeclaration()) continue;
-    DEBUG(dbgs() << "Loading edges in '" << F->getName() << "'\n");
-    readEdge(ReadCount++, PB, PB.getEdge(0, &F->getEntryBlock()), Counters);
-    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
-      TerminatorInst *TI = BB->getTerminator();
-      for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
-        readEdge(ReadCount++, PB, PB.getEdge(BB,TI->getSuccessor(s)),
-                 Counters);
-      }
-    }
-  }
-
-  return ReadCount;
-}
-
-/// setBranchWeightMetadata - Translate the counter values associated with each
-/// edge into branch weights for each conditional branch (a branch with 2 or
-/// more desinations).
-void ProfileMetadataLoaderPass::setBranchWeightMetadata(Module &M,
-                                                        ProfileData &PB) {
-  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
-    if (F->isDeclaration()) continue;
-    DEBUG(dbgs() << "Setting branch metadata in '" << F->getName() << "'\n");
-
-    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
-      TerminatorInst *TI = BB->getTerminator();
-      unsigned NumSuccessors = TI->getNumSuccessors();
-
-      // If there is only one successor then we can not set a branch
-      // probability as the target is certain.
-      if (NumSuccessors < 2) continue;
-
-      // Load the weights of all edges leading from this terminator.
-      DEBUG(dbgs() << "-- Terminator with " << NumSuccessors
-                   << " successors:\n");
-      SmallVector<uint32_t, 4> Weights(NumSuccessors);
-      for (unsigned s = 0 ; s < NumSuccessors ; ++s) {
-          ProfileData::Edge edge = PB.getEdge(BB, TI->getSuccessor(s));
-          Weights[s] = (uint32_t)PB.getEdgeWeight(edge);
-          DEBUG(dbgs() << "---- Edge '" << edge << "' has weight "
-                       << Weights[s] << "\n");
-      }
-
-      // Set branch weight metadata.  This will set branch probabilities of
-      // 100%/0% if that is true of the dynamic execution.
-      // BranchProbabilityInfo can account for this when it loads this metadata
-      // (it gives the unexectuted branch a weight of 1 for the purposes of
-      // probability calculations).
-      MDBuilder MDB(TI->getContext());
-      MDNode *Node = MDB.createBranchWeights(Weights);
-      TI->setMetadata(LLVMContext::MD_prof, Node);
-      NumTermsAnnotated++;
-    }
-  }
-}
-
-bool ProfileMetadataLoaderPass::runOnModule(Module &M) {
-  ProfileDataLoader PDL("profile-data-loader", Filename);
-  ProfileData PB;
-
-  ArrayRef<unsigned> Counters = PDL.getRawEdgeCounts();
-
-  unsigned ReadCount = matchEdges(M, PB, Counters);
-
-  if (ReadCount != Counters.size()) {
-    errs() << "WARNING: profile information is inconsistent with "
-           << "the current program!\n";
-  }
-  NumEdgesRead = ReadCount;
-
-  setBranchWeightMetadata(M, PB);
-
-  return ReadCount > 0;
-}
diff --git a/lib/Analysis/ProfileEstimatorPass.cpp b/lib/Analysis/ProfileEstimatorPass.cpp
deleted file mode 100644
index 365b64c..0000000
--- a/lib/Analysis/ProfileEstimatorPass.cpp
+++ /dev/null
@@ -1,426 +0,0 @@
-//===- ProfileEstimatorPass.cpp - LLVM Pass to estimate profile info ------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a concrete implementation of profiling information that
-// estimates the profiling information in a very crude and unimaginative way.
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "profile-estimator"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-static cl::opt<double>
-LoopWeight(
-    "profile-estimator-loop-weight", cl::init(10),
-    cl::value_desc("loop-weight"),
-    cl::desc("Number of loop executions used for profile-estimator")
-);
-
-namespace {
-  class ProfileEstimatorPass : public FunctionPass, public ProfileInfo {
-    double ExecCount;
-    LoopInfo *LI;
-    std::set<BasicBlock*>  BBToVisit;
-    std::map<Loop*,double> LoopExitWeights;
-    std::map<Edge,double>  MinimalWeight;
-  public:
-    static char ID; // Class identification, replacement for typeinfo
-    explicit ProfileEstimatorPass(const double execcount = 0)
-        : FunctionPass(ID), ExecCount(execcount) {
-      initializeProfileEstimatorPassPass(*PassRegistry::getPassRegistry());
-      if (execcount == 0) ExecCount = LoopWeight;
-    }
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesAll();
-      AU.addRequired<LoopInfo>();
-    }
-
-    virtual const char *getPassName() const {
-      return "Profiling information estimator";
-    }
-
-    /// run - Estimate the profile information from the specified file.
-    virtual bool runOnFunction(Function &F);
-
-    /// getAdjustedAnalysisPointer - This method is used when a pass implements
-    /// an analysis interface through multiple inheritance.  If needed, it
-    /// should override this to adjust the this pointer as needed for the
-    /// specified pass info.
-    virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
-      if (PI == &ProfileInfo::ID)
-        return (ProfileInfo*)this;
-      return this;
-    }
-    
-    virtual void recurseBasicBlock(BasicBlock *BB);
-
-    void inline printEdgeWeight(Edge);
-  };
-}  // End of anonymous namespace
-
-char ProfileEstimatorPass::ID = 0;
-INITIALIZE_AG_PASS_BEGIN(ProfileEstimatorPass, ProfileInfo, "profile-estimator",
-                "Estimate profiling information", false, true, false)
-INITIALIZE_PASS_DEPENDENCY(LoopInfo)
-INITIALIZE_AG_PASS_END(ProfileEstimatorPass, ProfileInfo, "profile-estimator",
-                "Estimate profiling information", false, true, false)
-
-namespace llvm {
-  char &ProfileEstimatorPassID = ProfileEstimatorPass::ID;
-
-  FunctionPass *createProfileEstimatorPass() {
-    return new ProfileEstimatorPass();
-  }
-
-  /// createProfileEstimatorPass - This function returns a Pass that estimates
-  /// profiling information using the given loop execution count.
-  Pass *createProfileEstimatorPass(const unsigned execcount) {
-    return new ProfileEstimatorPass(execcount);
-  }
-}
-
-static double ignoreMissing(double w) {
-  if (w == ProfileInfo::MissingValue) return 0;
-  return w;
-}
-
-static void inline printEdgeError(ProfileInfo::Edge e, const char *M) {
-  DEBUG(dbgs() << "-- Edge " << e << " is not calculated, " << M << "\n");
-}
-
-void inline ProfileEstimatorPass::printEdgeWeight(Edge E) {
-  DEBUG(dbgs() << "-- Weight of Edge " << E << ":"
-               << format("%20.20g", getEdgeWeight(E)) << "\n");
-}
-
-// recurseBasicBlock() - This calculates the ProfileInfo estimation for a
-// single block and then recurses into the successors.
-// The algorithm preserves the flow condition, meaning that the sum of the
-// weight of the incoming edges must be equal the block weight which must in
-// turn be equal to the sume of the weights of the outgoing edges.
-// Since the flow of an block is deterimined from the current state of the
-// flow, once an edge has a flow assigned this flow is never changed again,
-// otherwise it would be possible to violate the flow condition in another
-// block.
-void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) {
-
-  // Break the recursion if this BasicBlock was already visited.
-  if (BBToVisit.find(BB) == BBToVisit.end()) return;
-
-  // Read the LoopInfo for this block.
-  bool  BBisHeader = LI->isLoopHeader(BB);
-  Loop* BBLoop     = LI->getLoopFor(BB);
-
-  // To get the block weight, read all incoming edges.
-  double BBWeight = 0;
-  std::set<BasicBlock*> ProcessedPreds;
-  for ( pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
-        bbi != bbe; ++bbi ) {
-    // If this block was not considered already, add weight.
-    Edge edge = getEdge(*bbi,BB);
-    double w = getEdgeWeight(edge);
-    if (ProcessedPreds.insert(*bbi).second) {
-      BBWeight += ignoreMissing(w);
-    }
-    // If this block is a loop header and the predecessor is contained in this
-    // loop, thus the edge is a backedge, continue and do not check if the
-    // value is valid.
-    if (BBisHeader && BBLoop->contains(*bbi)) {
-      printEdgeError(edge, "but is backedge, continuing");
-      continue;
-    }
-    // If the edges value is missing (and this is no loop header, and this is
-    // no backedge) return, this block is currently non estimatable.
-    if (w == MissingValue) {
-      printEdgeError(edge, "returning");
-      return;
-    }
-  }
-  if (getExecutionCount(BB) != MissingValue) {
-    BBWeight = getExecutionCount(BB);
-  }
-
-  // Fetch all necessary information for current block.
-  SmallVector<Edge, 8> ExitEdges;
-  SmallVector<Edge, 8> Edges;
-  if (BBLoop) {
-    BBLoop->getExitEdges(ExitEdges);
-  }
-
-  // If this is a loop header, consider the following:
-  // Exactly the flow that is entering this block, must exit this block too. So
-  // do the following: 
-  // *) get all the exit edges, read the flow that is already leaving this
-  // loop, remember the edges that do not have any flow on them right now.
-  // (The edges that have already flow on them are most likely exiting edges of
-  // other loops, do not touch those flows because the previously caclulated
-  // loopheaders would not be exact anymore.)
-  // *) In case there is not a single exiting edge left, create one at the loop
-  // latch to prevent the flow from building up in the loop.
-  // *) Take the flow that is not leaving the loop already and distribute it on
-  // the remaining exiting edges.
-  // (This ensures that all flow that enters the loop also leaves it.)
-  // *) Increase the flow into the loop by increasing the weight of this block.
-  // There is at least one incoming backedge that will bring us this flow later
-  // on. (So that the flow condition in this node is valid again.)
-  if (BBisHeader) {
-    double incoming = BBWeight;
-    // Subtract the flow leaving the loop.
-    std::set<Edge> ProcessedExits;
-    for (SmallVectorImpl<Edge>::iterator ei = ExitEdges.begin(),
-         ee = ExitEdges.end(); ei != ee; ++ei) {
-      if (ProcessedExits.insert(*ei).second) {
-        double w = getEdgeWeight(*ei);
-        if (w == MissingValue) {
-          Edges.push_back(*ei);
-          // Check if there is a necessary minimal weight, if yes, subtract it 
-          // from weight.
-          if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
-            incoming -= MinimalWeight[*ei];
-            DEBUG(dbgs() << "Reserving " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
-          }
-        } else {
-          incoming -= w;
-        }
-      }
-    }
-    // If no exit edges, create one:
-    if (Edges.size() == 0) {
-      BasicBlock *Latch = BBLoop->getLoopLatch();
-      if (Latch) {
-        Edge edge = getEdge(Latch,0);
-        EdgeInformation[BB->getParent()][edge] = BBWeight;
-        printEdgeWeight(edge);
-        edge = getEdge(Latch, BB);
-        EdgeInformation[BB->getParent()][edge] = BBWeight * ExecCount;
-        printEdgeWeight(edge);
-      }
-    }
-
-    // Distribute remaining weight to the exting edges. To prevent fractions
-    // from building up and provoking precision problems the weight which is to
-    // be distributed is split and the rounded, the last edge gets a somewhat
-    // bigger value, but we are close enough for an estimation.
-    double fraction = floor(incoming/Edges.size());
-    for (SmallVectorImpl<Edge>::iterator ei = Edges.begin(), ee = Edges.end();
-         ei != ee; ++ei) {
-      double w = 0;
-      if (ei != (ee-1)) {
-        w = fraction;
-        incoming -= fraction;
-      } else {
-        w = incoming;
-      }
-      EdgeInformation[BB->getParent()][*ei] += w;
-      // Read necessary minimal weight.
-      if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
-        EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei];
-        DEBUG(dbgs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
-      }
-      printEdgeWeight(*ei);
-      
-      // Add minimal weight to paths to all exit edges, this is used to ensure
-      // that enough flow is reaching this edges.
-      Path p;
-      const BasicBlock *Dest = GetPath(BB, (*ei).first, p, GetPathToDest);
-      while (Dest != BB) {
-        const BasicBlock *Parent = p.find(Dest)->second;
-        Edge e = getEdge(Parent, Dest);
-        if (MinimalWeight.find(e) == MinimalWeight.end()) {
-          MinimalWeight[e] = 0;
-        }
-        MinimalWeight[e] += w;
-        DEBUG(dbgs() << "Minimal Weight for " << e << ": " << format("%.20g",MinimalWeight[e]) << "\n");
-        Dest = Parent;
-      }
-    }
-    // Increase flow into the loop.
-    BBWeight *= (ExecCount+1);
-  }
-
-  BlockInformation[BB->getParent()][BB] = BBWeight;
-  // Up until now we considered only the loop exiting edges, now we have a
-  // definite block weight and must distribute this onto the outgoing edges.
-  // Since there may be already flow attached to some of the edges, read this
-  // flow first and remember the edges that have still now flow attached.
-  Edges.clear();
-  std::set<BasicBlock*> ProcessedSuccs;
-
-  succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
-  // Also check for (BB,0) edges that may already contain some flow. (But only
-  // in case there are no successors.)
-  if (bbi == bbe) {
-    Edge edge = getEdge(BB,0);
-    EdgeInformation[BB->getParent()][edge] = BBWeight;
-    printEdgeWeight(edge);
-  }
-  for ( ; bbi != bbe; ++bbi ) {
-    if (ProcessedSuccs.insert(*bbi).second) {
-      Edge edge = getEdge(BB,*bbi);
-      double w = getEdgeWeight(edge);
-      if (w != MissingValue) {
-        BBWeight -= getEdgeWeight(edge);
-      } else {
-        Edges.push_back(edge);
-        // If minimal weight is necessary, reserve weight by subtracting weight
-        // from block weight, this is readded later on.
-        if (MinimalWeight.find(edge) != MinimalWeight.end()) {
-          BBWeight -= MinimalWeight[edge];
-          DEBUG(dbgs() << "Reserving " << format("%.20g",MinimalWeight[edge]) << " at " << edge << "\n");
-        }
-      }
-    }
-  }
-
-  double fraction = Edges.size() ? floor(BBWeight/Edges.size()) : 0.0;
-  // Finally we know what flow is still not leaving the block, distribute this
-  // flow onto the empty edges.
-  for (SmallVectorImpl<Edge>::iterator ei = Edges.begin(), ee = Edges.end();
-       ei != ee; ++ei) {
-    if (ei != (ee-1)) {
-      EdgeInformation[BB->getParent()][*ei] += fraction;
-      BBWeight -= fraction;
-    } else {
-      EdgeInformation[BB->getParent()][*ei] += BBWeight;
-    }
-    // Readd minial necessary weight.
-    if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
-      EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei];
-      DEBUG(dbgs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
-    }
-    printEdgeWeight(*ei);
-  }
-
-  // This block is visited, mark this before the recursion.
-  BBToVisit.erase(BB);
-
-  // Recurse into successors.
-  for (succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
-       bbi != bbe; ++bbi) {
-    recurseBasicBlock(*bbi);
-  }
-}
-
-bool ProfileEstimatorPass::runOnFunction(Function &F) {
-  if (F.isDeclaration()) return false;
-
-  // Fetch LoopInfo and clear ProfileInfo for this function.
-  LI = &getAnalysis<LoopInfo>();
-  FunctionInformation.erase(&F);
-  BlockInformation[&F].clear();
-  EdgeInformation[&F].clear();
-  BBToVisit.clear();
-
-  // Mark all blocks as to visit.
-  for (Function::iterator bi = F.begin(), be = F.end(); bi != be; ++bi)
-    BBToVisit.insert(bi);
-
-  // Clear Minimal Edges.
-  MinimalWeight.clear();
-
-  DEBUG(dbgs() << "Working on function " << F.getName() << "\n");
-
-  // Since the entry block is the first one and has no predecessors, the edge
-  // (0,entry) is inserted with the starting weight of 1.
-  BasicBlock *entry = &F.getEntryBlock();
-  BlockInformation[&F][entry] = pow(2.0, 32.0);
-  Edge edge = getEdge(0,entry);
-  EdgeInformation[&F][edge] = BlockInformation[&F][entry];
-  printEdgeWeight(edge);
-
-  // Since recurseBasicBlock() maybe returns with a block which was not fully
-  // estimated, use recurseBasicBlock() until everything is calculated.
-  bool cleanup = false;
-  recurseBasicBlock(entry);
-  while (BBToVisit.size() > 0 && !cleanup) {
-    // Remember number of open blocks, this is later used to check if progress
-    // was made.
-    unsigned size = BBToVisit.size();
-
-    // Try to calculate all blocks in turn.
-    for (std::set<BasicBlock*>::iterator bi = BBToVisit.begin(),
-         be = BBToVisit.end(); bi != be; ++bi) {
-      recurseBasicBlock(*bi);
-      // If at least one block was finished, break because iterator may be
-      // invalid.
-      if (BBToVisit.size() < size) break;
-    }
-
-    // If there was not a single block resolved, make some assumptions.
-    if (BBToVisit.size() == size) {
-      bool found = false;
-      for (std::set<BasicBlock*>::iterator BBI = BBToVisit.begin(), BBE = BBToVisit.end(); 
-           (BBI != BBE) && (!found); ++BBI) {
-        BasicBlock *BB = *BBI;
-        // Try each predecessor if it can be assumend.
-        for (pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
-             (bbi != bbe) && (!found); ++bbi) {
-          Edge e = getEdge(*bbi,BB);
-          double w = getEdgeWeight(e);
-          // Check that edge from predecessor is still free.
-          if (w == MissingValue) {
-            // Check if there is a circle from this block to predecessor.
-            Path P;
-            const BasicBlock *Dest = GetPath(BB, *bbi, P, GetPathToDest);
-            if (Dest != *bbi) {
-              // If there is no circle, just set edge weight to 0
-              EdgeInformation[&F][e] = 0;
-              DEBUG(dbgs() << "Assuming edge weight: ");
-              printEdgeWeight(e);
-              found = true;
-            }
-          }
-        }
-      }
-      if (!found) {
-        cleanup = true;
-        DEBUG(dbgs() << "No assumption possible in Fuction "<<F.getName()<<", setting all to zero\n");
-      }
-    }
-  }
-  // In case there was no safe way to assume edges, set as a last measure, 
-  // set _everything_ to zero.
-  if (cleanup) {
-    FunctionInformation[&F] = 0;
-    BlockInformation[&F].clear();
-    EdgeInformation[&F].clear();
-    for (Function::const_iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
-      const BasicBlock *BB = &(*FI);
-      BlockInformation[&F][BB] = 0;
-      const_pred_iterator predi = pred_begin(BB), prede = pred_end(BB);
-      if (predi == prede) {
-        Edge e = getEdge(0,BB);
-        setEdgeWeight(e,0);
-      }
-      for (;predi != prede; ++predi) {
-        Edge e = getEdge(*predi,BB);
-        setEdgeWeight(e,0);
-      }
-      succ_const_iterator succi = succ_begin(BB), succe = succ_end(BB);
-      if (succi == succe) {
-        Edge e = getEdge(BB,0);
-        setEdgeWeight(e,0);
-      }
-      for (;succi != succe; ++succi) {
-        Edge e = getEdge(*succi,BB);
-        setEdgeWeight(e,0);
-      }
-    }
-  }
-
-  return false;
-}
diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp
deleted file mode 100644
index 9626a48..0000000
--- a/lib/Analysis/ProfileInfo.cpp
+++ /dev/null
@@ -1,1079 +0,0 @@
-//===- ProfileInfo.cpp - Profile Info Interface ---------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the abstract ProfileInfo interface, and the default
-// "no profile" implementation.
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "profile-info"
-#include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include <limits>
-#include <queue>
-#include <set>
-using namespace llvm;
-
-namespace llvm {
-  template<> char ProfileInfoT<Function,BasicBlock>::ID = 0;
-}
-
-// Register the ProfileInfo interface, providing a nice name to refer to.
-INITIALIZE_ANALYSIS_GROUP(ProfileInfo, "Profile Information", NoProfileInfo)
-
-namespace llvm {
-
-template <>
-ProfileInfoT<MachineFunction, MachineBasicBlock>::ProfileInfoT() {}
-template <>
-ProfileInfoT<MachineFunction, MachineBasicBlock>::~ProfileInfoT() {}
-
-template <>
-ProfileInfoT<Function, BasicBlock>::ProfileInfoT() {
-  MachineProfile = 0;
-}
-template <>
-ProfileInfoT<Function, BasicBlock>::~ProfileInfoT() {
-  if (MachineProfile) delete MachineProfile;
-}
-
-template<>
-char ProfileInfoT<MachineFunction, MachineBasicBlock>::ID = 0;
-
-template<>
-const double ProfileInfoT<Function,BasicBlock>::MissingValue = -1;
-
-template<> const
-double ProfileInfoT<MachineFunction, MachineBasicBlock>::MissingValue = -1;
-
-template<> double
-ProfileInfoT<Function,BasicBlock>::getExecutionCount(const BasicBlock *BB) {
-  std::map<const Function*, BlockCounts>::iterator J =
-    BlockInformation.find(BB->getParent());
-  if (J != BlockInformation.end()) {
-    BlockCounts::iterator I = J->second.find(BB);
-    if (I != J->second.end())
-      return I->second;
-  }
-
-  double Count = MissingValue;
-
-  const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
-
-  // Are there zero predecessors of this block?
-  if (PI == PE) {
-    Edge e = getEdge(0, BB);
-    Count = getEdgeWeight(e);
-  } else {
-    // Otherwise, if there are predecessors, the execution count of this block is
-    // the sum of the edge frequencies from the incoming edges.
-    std::set<const BasicBlock*> ProcessedPreds;
-    Count = 0;
-    for (; PI != PE; ++PI) {
-      const BasicBlock *P = *PI;
-      if (ProcessedPreds.insert(P).second) {
-        double w = getEdgeWeight(getEdge(P, BB));
-        if (w == MissingValue) {
-          Count = MissingValue;
-          break;
-        }
-        Count += w;
-      }
-    }
-  }
-
-  // If the predecessors did not suffice to get block weight, try successors.
-  if (Count == MissingValue) {
-
-    succ_const_iterator SI = succ_begin(BB), SE = succ_end(BB);
-
-    // Are there zero successors of this block?
-    if (SI == SE) {
-      Edge e = getEdge(BB,0);
-      Count = getEdgeWeight(e);
-    } else {
-      std::set<const BasicBlock*> ProcessedSuccs;
-      Count = 0;
-      for (; SI != SE; ++SI)
-        if (ProcessedSuccs.insert(*SI).second) {
-          double w = getEdgeWeight(getEdge(BB, *SI));
-          if (w == MissingValue) {
-            Count = MissingValue;
-            break;
-          }
-          Count += w;
-        }
-    }
-  }
-
-  if (Count != MissingValue) BlockInformation[BB->getParent()][BB] = Count;
-  return Count;
-}
-
-template<>
-double ProfileInfoT<MachineFunction, MachineBasicBlock>::
-        getExecutionCount(const MachineBasicBlock *MBB) {
-  std::map<const MachineFunction*, BlockCounts>::iterator J =
-    BlockInformation.find(MBB->getParent());
-  if (J != BlockInformation.end()) {
-    BlockCounts::iterator I = J->second.find(MBB);
-    if (I != J->second.end())
-      return I->second;
-  }
-
-  return MissingValue;
-}
-
-template<>
-double ProfileInfoT<Function,BasicBlock>::getExecutionCount(const Function *F) {
-  std::map<const Function*, double>::iterator J =
-    FunctionInformation.find(F);
-  if (J != FunctionInformation.end())
-    return J->second;
-
-  // isDeclaration() is checked here and not at start of function to allow
-  // functions without a body still to have a execution count.
-  if (F->isDeclaration()) return MissingValue;
-
-  double Count = getExecutionCount(&F->getEntryBlock());
-  if (Count != MissingValue) FunctionInformation[F] = Count;
-  return Count;
-}
-
-template<>
-double ProfileInfoT<MachineFunction, MachineBasicBlock>::
-        getExecutionCount(const MachineFunction *MF) {
-  std::map<const MachineFunction*, double>::iterator J =
-    FunctionInformation.find(MF);
-  if (J != FunctionInformation.end())
-    return J->second;
-
-  double Count = getExecutionCount(&MF->front());
-  if (Count != MissingValue) FunctionInformation[MF] = Count;
-  return Count;
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::
-        setExecutionCount(const BasicBlock *BB, double w) {
-  DEBUG(dbgs() << "Creating Block " << BB->getName() 
-               << " (weight: " << format("%.20g",w) << ")\n");
-  BlockInformation[BB->getParent()][BB] = w;
-}
-
-template<>
-void ProfileInfoT<MachineFunction, MachineBasicBlock>::
-        setExecutionCount(const MachineBasicBlock *MBB, double w) {
-  DEBUG(dbgs() << "Creating Block " << MBB->getBasicBlock()->getName()
-               << " (weight: " << format("%.20g",w) << ")\n");
-  BlockInformation[MBB->getParent()][MBB] = w;
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::addEdgeWeight(Edge e, double w) {
-  double oldw = getEdgeWeight(e);
-  assert (oldw != MissingValue && "Adding weight to Edge with no previous weight");
-  DEBUG(dbgs() << "Adding to Edge " << e
-               << " (new weight: " << format("%.20g",oldw + w) << ")\n");
-  EdgeInformation[getFunction(e)][e] = oldw + w;
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::
-        addExecutionCount(const BasicBlock *BB, double w) {
-  double oldw = getExecutionCount(BB);
-  assert (oldw != MissingValue && "Adding weight to Block with no previous weight");
-  DEBUG(dbgs() << "Adding to Block " << BB->getName()
-               << " (new weight: " << format("%.20g",oldw + w) << ")\n");
-  BlockInformation[BB->getParent()][BB] = oldw + w;
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::removeBlock(const BasicBlock *BB) {
-  std::map<const Function*, BlockCounts>::iterator J =
-    BlockInformation.find(BB->getParent());
-  if (J == BlockInformation.end()) return;
-
-  DEBUG(dbgs() << "Deleting " << BB->getName() << "\n");
-  J->second.erase(BB);
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::removeEdge(Edge e) {
-  std::map<const Function*, EdgeWeights>::iterator J =
-    EdgeInformation.find(getFunction(e));
-  if (J == EdgeInformation.end()) return;
-
-  DEBUG(dbgs() << "Deleting" << e << "\n");
-  J->second.erase(e);
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::
-        replaceEdge(const Edge &oldedge, const Edge &newedge) {
-  double w;
-  if ((w = getEdgeWeight(newedge)) == MissingValue) {
-    w = getEdgeWeight(oldedge);
-    DEBUG(dbgs() << "Replacing " << oldedge << " with " << newedge  << "\n");
-  } else {
-    w += getEdgeWeight(oldedge);
-    DEBUG(dbgs() << "Adding " << oldedge << " to " << newedge  << "\n");
-  }
-  setEdgeWeight(newedge,w);
-  removeEdge(oldedge);
-}
-
-template<>
-const BasicBlock *ProfileInfoT<Function,BasicBlock>::
-        GetPath(const BasicBlock *Src, const BasicBlock *Dest,
-                Path &P, unsigned Mode) {
-  const BasicBlock *BB = 0;
-  bool hasFoundPath = false;
-
-  std::queue<const BasicBlock *> BFS;
-  BFS.push(Src);
-
-  while(BFS.size() && !hasFoundPath) {
-    BB = BFS.front();
-    BFS.pop();
-
-    succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB);
-    if (Succ == End) {
-      P[(const BasicBlock*)0] = BB;
-      if (Mode & GetPathToExit) {
-        hasFoundPath = true;
-        BB = 0;
-      }
-    }
-    for(;Succ != End; ++Succ) {
-      if (P.find(*Succ) != P.end()) continue;
-      Edge e = getEdge(BB,*Succ);
-      if ((Mode & GetPathWithNewEdges) && (getEdgeWeight(e) != MissingValue)) continue;
-      P[*Succ] = BB;
-      BFS.push(*Succ);
-      if ((Mode & GetPathToDest) && *Succ == Dest) {
-        hasFoundPath = true;
-        BB = *Succ;
-        break;
-      }
-      if ((Mode & GetPathToValue) && (getExecutionCount(*Succ) != MissingValue)) {
-        hasFoundPath = true;
-        BB = *Succ;
-        break;
-      }
-    }
-  }
-
-  return BB;
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::
-        divertFlow(const Edge &oldedge, const Edge &newedge) {
-  DEBUG(dbgs() << "Diverting " << oldedge << " via " << newedge );
-
-  // First check if the old edge was taken, if not, just delete it...
-  if (getEdgeWeight(oldedge) == 0) {
-    removeEdge(oldedge);
-    return;
-  }
-
-  Path P;
-  P[newedge.first] = 0;
-  P[newedge.second] = newedge.first;
-  const BasicBlock *BB = GetPath(newedge.second,oldedge.second,P,GetPathToExit | GetPathToDest);
-
-  double w = getEdgeWeight (oldedge);
-  DEBUG(dbgs() << ", Weight: " << format("%.20g",w) << "\n");
-  do {
-    const BasicBlock *Parent = P.find(BB)->second;
-    Edge e = getEdge(Parent,BB);
-    double oldw = getEdgeWeight(e);
-    double oldc = getExecutionCount(e.first);
-    setEdgeWeight(e, w+oldw);
-    if (Parent != oldedge.first) {
-      setExecutionCount(e.first, w+oldc);
-    }
-    BB = Parent;
-  } while (BB != newedge.first);
-  removeEdge(oldedge);
-}
-
-/// Replaces all occurrences of RmBB in the ProfilingInfo with DestBB.
-/// This checks all edges of the function the blocks reside in and replaces the
-/// occurrences of RmBB with DestBB.
-template<>
-void ProfileInfoT<Function,BasicBlock>::
-        replaceAllUses(const BasicBlock *RmBB, const BasicBlock *DestBB) {
-  DEBUG(dbgs() << "Replacing " << RmBB->getName()
-               << " with " << DestBB->getName() << "\n");
-  const Function *F = DestBB->getParent();
-  std::map<const Function*, EdgeWeights>::iterator J =
-    EdgeInformation.find(F);
-  if (J == EdgeInformation.end()) return;
-
-  Edge e, newedge;
-  bool erasededge = false;
-  EdgeWeights::iterator I = J->second.begin(), E = J->second.end();
-  while(I != E) {
-    e = (I++)->first;
-    bool foundedge = false; bool eraseedge = false;
-    if (e.first == RmBB) {
-      if (e.second == DestBB) {
-        eraseedge = true;
-      } else {
-        newedge = getEdge(DestBB, e.second);
-        foundedge = true;
-      }
-    }
-    if (e.second == RmBB) {
-      if (e.first == DestBB) {
-        eraseedge = true;
-      } else {
-        newedge = getEdge(e.first, DestBB);
-        foundedge = true;
-      }
-    }
-    if (foundedge) {
-      replaceEdge(e, newedge);
-    }
-    if (eraseedge) {
-      if (erasededge) {
-        Edge newedge = getEdge(DestBB, DestBB);
-        replaceEdge(e, newedge);
-      } else {
-        removeEdge(e);
-        erasededge = true;
-      }
-    }
-  }
-}
-
-/// Splits an edge in the ProfileInfo and redirects flow over NewBB.
-/// Since its possible that there is more than one edge in the CFG from FristBB
-/// to SecondBB its necessary to redirect the flow proporionally.
-template<>
-void ProfileInfoT<Function,BasicBlock>::splitEdge(const BasicBlock *FirstBB,
-                                                  const BasicBlock *SecondBB,
-                                                  const BasicBlock *NewBB,
-                                                  bool MergeIdenticalEdges) {
-  const Function *F = FirstBB->getParent();
-  std::map<const Function*, EdgeWeights>::iterator J =
-    EdgeInformation.find(F);
-  if (J == EdgeInformation.end()) return;
-
-  // Generate edges and read current weight.
-  Edge e  = getEdge(FirstBB, SecondBB);
-  Edge n1 = getEdge(FirstBB, NewBB);
-  Edge n2 = getEdge(NewBB, SecondBB);
-  EdgeWeights &ECs = J->second;
-  double w = ECs[e];
-
-  int succ_count = 0;
-  if (!MergeIdenticalEdges) {
-    // First count the edges from FristBB to SecondBB, if there is more than
-    // one, only slice out a proporional part for NewBB.
-    for(succ_const_iterator BBI = succ_begin(FirstBB), BBE = succ_end(FirstBB);
-        BBI != BBE; ++BBI) {
-      if (*BBI == SecondBB) succ_count++;  
-    }
-    // When the NewBB is completely new, increment the count by one so that
-    // the counts are properly distributed.
-    if (getExecutionCount(NewBB) == ProfileInfo::MissingValue) succ_count++;
-  } else {
-    // When the edges are merged anyway, then redirect all flow.
-    succ_count = 1;
-  }
-
-  // We know now how many edges there are from FirstBB to SecondBB, reroute a
-  // proportional part of the edge weight over NewBB.
-  double neww = floor(w / succ_count);
-  ECs[n1] += neww;
-  ECs[n2] += neww;
-  BlockInformation[F][NewBB] += neww;
-  if (succ_count == 1) {
-    ECs.erase(e);
-  } else {
-    ECs[e] -= neww;
-  }
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::splitBlock(const BasicBlock *Old,
-                                                   const BasicBlock* New) {
-  const Function *F = Old->getParent();
-  std::map<const Function*, EdgeWeights>::iterator J =
-    EdgeInformation.find(F);
-  if (J == EdgeInformation.end()) return;
-
-  DEBUG(dbgs() << "Splitting " << Old->getName() << " to " << New->getName() << "\n");
-
-  std::set<Edge> Edges;
-  for (EdgeWeights::iterator ewi = J->second.begin(), ewe = J->second.end(); 
-       ewi != ewe; ++ewi) {
-    Edge old = ewi->first;
-    if (old.first == Old) {
-      Edges.insert(old);
-    }
-  }
-  for (std::set<Edge>::iterator EI = Edges.begin(), EE = Edges.end(); 
-       EI != EE; ++EI) {
-    Edge newedge = getEdge(New, EI->second);
-    replaceEdge(*EI, newedge);
-  }
-
-  double w = getExecutionCount(Old);
-  setEdgeWeight(getEdge(Old, New), w);
-  setExecutionCount(New, w);
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::splitBlock(const BasicBlock *BB,
-                                                   const BasicBlock* NewBB,
-                                                   BasicBlock *const *Preds,
-                                                   unsigned NumPreds) {
-  const Function *F = BB->getParent();
-  std::map<const Function*, EdgeWeights>::iterator J =
-    EdgeInformation.find(F);
-  if (J == EdgeInformation.end()) return;
-
-  DEBUG(dbgs() << "Splitting " << NumPreds << " Edges from " << BB->getName() 
-               << " to " << NewBB->getName() << "\n");
-
-  // Collect weight that was redirected over NewBB.
-  double newweight = 0;
-  
-  std::set<const BasicBlock *> ProcessedPreds;
-  // For all requestes Predecessors.
-  for (unsigned pred = 0; pred < NumPreds; ++pred) {
-    const BasicBlock * Pred = Preds[pred];
-    if (ProcessedPreds.insert(Pred).second) {
-      // Create edges and read old weight.
-      Edge oldedge = getEdge(Pred, BB);
-      Edge newedge = getEdge(Pred, NewBB);
-
-      // Remember how much weight was redirected.
-      newweight += getEdgeWeight(oldedge);
-    
-      replaceEdge(oldedge,newedge);
-    }
-  }
-
-  Edge newedge = getEdge(NewBB,BB);
-  setEdgeWeight(newedge, newweight);
-  setExecutionCount(NewBB, newweight);
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::transfer(const Function *Old,
-                                                 const Function *New) {
-  DEBUG(dbgs() << "Replacing Function " << Old->getName() << " with "
-               << New->getName() << "\n");
-  std::map<const Function*, EdgeWeights>::iterator J =
-    EdgeInformation.find(Old);
-  if(J != EdgeInformation.end()) {
-    EdgeInformation[New] = J->second;
-  }
-  EdgeInformation.erase(Old);
-  BlockInformation.erase(Old);
-  FunctionInformation.erase(Old);
-}
-
-static double readEdgeOrRemember(ProfileInfo::Edge edge, double w,
-                                 ProfileInfo::Edge &tocalc, unsigned &uncalc) {
-  if (w == ProfileInfo::MissingValue) {
-    tocalc = edge;
-    uncalc++;
-    return 0;
-  } else {
-    return w;
-  }
-}
-
-template<>
-bool ProfileInfoT<Function,BasicBlock>::
-        CalculateMissingEdge(const BasicBlock *BB, Edge &removed,
-                             bool assumeEmptySelf) {
-  Edge edgetocalc;
-  unsigned uncalculated = 0;
-
-  // collect weights of all incoming and outgoing edges, rememer edges that
-  // have no value
-  double incount = 0;
-  SmallSet<const BasicBlock*,8> pred_visited;
-  const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
-  if (bbi==bbe) {
-    Edge e = getEdge(0,BB);
-    incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated);
-  }
-  for (;bbi != bbe; ++bbi) {
-    if (pred_visited.insert(*bbi)) {
-      Edge e = getEdge(*bbi,BB);
-      incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated);
-    }
-  }
-
-  double outcount = 0;
-  SmallSet<const BasicBlock*,8> succ_visited;
-  succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB);
-  if (sbbi==sbbe) {
-    Edge e = getEdge(BB,0);
-    if (getEdgeWeight(e) == MissingValue) {
-      double w = getExecutionCount(BB);
-      if (w != MissingValue) {
-        setEdgeWeight(e,w);
-        removed = e;
-      }
-    }
-    outcount += readEdgeOrRemember(e, getEdgeWeight(e), edgetocalc, uncalculated);
-  }
-  for (;sbbi != sbbe; ++sbbi) {
-    if (succ_visited.insert(*sbbi)) {
-      Edge e = getEdge(BB,*sbbi);
-      outcount += readEdgeOrRemember(e, getEdgeWeight(e), edgetocalc, uncalculated);
-    }
-  }
-
-  // if exactly one edge weight was missing, calculate it and remove it from
-  // spanning tree
-  if (uncalculated == 0 ) {
-    return true;
-  } else
-  if (uncalculated == 1) {
-    if (incount < outcount) {
-      EdgeInformation[BB->getParent()][edgetocalc] = outcount-incount;
-    } else {
-      EdgeInformation[BB->getParent()][edgetocalc] = incount-outcount;
-    }
-    DEBUG(dbgs() << "--Calc Edge Counter for " << edgetocalc << ": "
-                 << format("%.20g", getEdgeWeight(edgetocalc)) << "\n");
-    removed = edgetocalc;
-    return true;
-  } else 
-  if (uncalculated == 2 && assumeEmptySelf && edgetocalc.first == edgetocalc.second && incount == outcount) {
-    setEdgeWeight(edgetocalc, incount * 10);
-    removed = edgetocalc;
-    return true;
-  } else {
-    return false;
-  }
-}
-
-static void readEdge(ProfileInfo *PI, ProfileInfo::Edge e, double &calcw, std::set<ProfileInfo::Edge> &misscount) {
-  double w = PI->getEdgeWeight(e);
-  if (w != ProfileInfo::MissingValue) {
-    calcw += w;
-  } else {
-    misscount.insert(e);
-  }
-}
-
-template<>
-bool ProfileInfoT<Function,BasicBlock>::EstimateMissingEdges(const BasicBlock *BB) {
-  double inWeight = 0;
-  std::set<Edge> inMissing;
-  std::set<const BasicBlock*> ProcessedPreds;
-  const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
-  if (bbi == bbe) {
-    readEdge(this,getEdge(0,BB),inWeight,inMissing);
-  }
-  for( ; bbi != bbe; ++bbi ) {
-    if (ProcessedPreds.insert(*bbi).second) {
-      readEdge(this,getEdge(*bbi,BB),inWeight,inMissing);
-    }
-  }
-
-  double outWeight = 0;
-  std::set<Edge> outMissing;
-  std::set<const BasicBlock*> ProcessedSuccs;
-  succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB);
-  if (sbbi == sbbe)
-    readEdge(this,getEdge(BB,0),outWeight,outMissing);
-  for ( ; sbbi != sbbe; ++sbbi ) {
-    if (ProcessedSuccs.insert(*sbbi).second) {
-      readEdge(this,getEdge(BB,*sbbi),outWeight,outMissing);
-    }
-  }
-
-  double share;
-  std::set<Edge>::iterator ei,ee;
-  if (inMissing.size() == 0 && outMissing.size() > 0) {
-    ei = outMissing.begin();
-    ee = outMissing.end();
-    share = inWeight/outMissing.size();
-    setExecutionCount(BB,inWeight);
-  } else
-  if (inMissing.size() > 0 && outMissing.size() == 0 && outWeight == 0) {
-    ei = inMissing.begin();
-    ee = inMissing.end();
-    share = 0;
-    setExecutionCount(BB,0);
-  } else
-  if (inMissing.size() == 0 && outMissing.size() == 0) {
-    setExecutionCount(BB,outWeight);
-    return true;
-  } else {
-    return false;
-  }
-  for ( ; ei != ee; ++ei ) {
-    setEdgeWeight(*ei,share);
-  }
-  return true;
-}
-
-template<>
-void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) {
-//  if (getExecutionCount(&(F->getEntryBlock())) == 0) {
-//    for (Function::const_iterator FI = F->begin(), FE = F->end();
-//         FI != FE; ++FI) {
-//      const BasicBlock* BB = &(*FI);
-//      {
-//        const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
-//        if (NBB == End) {
-//          setEdgeWeight(getEdge(0,BB),0);
-//        }
-//        for(;NBB != End; ++NBB) {
-//          setEdgeWeight(getEdge(*NBB,BB),0);
-//        }
-//      }
-//      {
-//        succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
-//        if (NBB == End) {
-//          setEdgeWeight(getEdge(0,BB),0);
-//        }
-//        for(;NBB != End; ++NBB) {
-//          setEdgeWeight(getEdge(*NBB,BB),0);
-//        }
-//      }
-//    }
-//    return;
-//  }
-  // The set of BasicBlocks that are still unvisited.
-  std::set<const BasicBlock*> Unvisited;
-
-  // The set of return edges (Edges with no successors).
-  std::set<Edge> ReturnEdges;
-  double ReturnWeight = 0;
-  
-  // First iterate over the whole function and collect:
-  // 1) The blocks in this function in the Unvisited set.
-  // 2) The return edges in the ReturnEdges set.
-  // 3) The flow that is leaving the function already via return edges.
-
-  // Data structure for searching the function.
-  std::queue<const BasicBlock *> BFS;
-  const BasicBlock *BB = &(F->getEntryBlock());
-  BFS.push(BB);
-  Unvisited.insert(BB);
-
-  while (BFS.size()) {
-    BB = BFS.front(); BFS.pop();
-    succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
-    if (NBB == End) {
-      Edge e = getEdge(BB,0);
-      double w = getEdgeWeight(e);
-      if (w == MissingValue) {
-        // If the return edge has no value, try to read value from block.
-        double bw = getExecutionCount(BB);
-        if (bw != MissingValue) {
-          setEdgeWeight(e,bw);
-          ReturnWeight += bw;
-        } else {
-          // If both return edge and block provide no value, collect edge.
-          ReturnEdges.insert(e);
-        }
-      } else {
-        // If the return edge has a proper value, collect it.
-        ReturnWeight += w;
-      }
-    }
-    for (;NBB != End; ++NBB) {
-      if (Unvisited.insert(*NBB).second) {
-        BFS.push(*NBB);
-      }
-    }
-  }
-
-  while (Unvisited.size() > 0) {
-    unsigned oldUnvisitedCount = Unvisited.size();
-    bool FoundPath = false;
-
-    // If there is only one edge left, calculate it.
-    if (ReturnEdges.size() == 1) {
-      ReturnWeight = getExecutionCount(&(F->getEntryBlock())) - ReturnWeight;
-
-      Edge e = *ReturnEdges.begin();
-      setEdgeWeight(e,ReturnWeight);
-      setExecutionCount(e.first,ReturnWeight);
-
-      Unvisited.erase(e.first);
-      ReturnEdges.erase(e);
-      continue;
-    }
-
-    // Calculate all blocks where only one edge is missing, this may also
-    // resolve furhter return edges.
-    std::set<const BasicBlock *>::iterator FI = Unvisited.begin(), FE = Unvisited.end();
-    while(FI != FE) {
-      const BasicBlock *BB = *FI; ++FI;
-      Edge e;
-      if(CalculateMissingEdge(BB,e,true)) {
-        if (BlockInformation[F].find(BB) == BlockInformation[F].end()) {
-          setExecutionCount(BB,getExecutionCount(BB));
-        }
-        Unvisited.erase(BB);
-        if (e.first != 0 && e.second == 0) {
-          ReturnEdges.erase(e);
-          ReturnWeight += getEdgeWeight(e);
-        }
-      }
-    }
-    if (oldUnvisitedCount > Unvisited.size()) continue;
-
-    // Estimate edge weights by dividing the flow proportionally.
-    FI = Unvisited.begin(), FE = Unvisited.end();
-    while(FI != FE) {
-      const BasicBlock *BB = *FI; ++FI;
-      const BasicBlock *Dest = 0;
-      bool AllEdgesHaveSameReturn = true;
-      // Check each Successor, these must all end up in the same or an empty
-      // return block otherwise its dangerous to do an estimation on them.
-      for (succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB);
-           Succ != End; ++Succ) {
-        Path P;
-        GetPath(*Succ, 0, P, GetPathToExit);
-        if (Dest && Dest != P[(const BasicBlock*)0]) {
-          AllEdgesHaveSameReturn = false;
-        }
-        Dest = P[(const BasicBlock*)0];
-      }
-      if (AllEdgesHaveSameReturn) {
-        if(EstimateMissingEdges(BB)) {
-          Unvisited.erase(BB);
-          break;
-        }
-      }
-    }
-    if (oldUnvisitedCount > Unvisited.size()) continue;
-
-    // Check if there is a path to an block that has a known value and redirect
-    // flow accordingly.
-    FI = Unvisited.begin(), FE = Unvisited.end();
-    while(FI != FE && !FoundPath) {
-      // Fetch path.
-      const BasicBlock *BB = *FI; ++FI;
-      Path P;
-      const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToValue);
-
-      // Calculate incoming flow.
-      double iw = 0; unsigned inmissing = 0; unsigned incount = 0; unsigned invalid = 0;
-      std::set<const BasicBlock *> Processed;
-      for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
-           NBB != End; ++NBB) {
-        if (Processed.insert(*NBB).second) {
-          Edge e = getEdge(*NBB, BB);
-          double ew = getEdgeWeight(e);
-          if (ew != MissingValue) {
-            iw += ew;
-            invalid++;
-          } else {
-            // If the path contains the successor, this means its a backedge,
-            // do not count as missing.
-            if (P.find(*NBB) == P.end())
-              inmissing++;
-          }
-          incount++;
-        }
-      }
-      if (inmissing == incount) continue;
-      if (invalid == 0) continue;
-
-      // Subtract (already) outgoing flow.
-      Processed.clear();
-      for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
-           NBB != End; ++NBB) {
-        if (Processed.insert(*NBB).second) {
-          Edge e = getEdge(BB, *NBB);
-          double ew = getEdgeWeight(e);
-          if (ew != MissingValue) {
-            iw -= ew;
-          }
-        }
-      }
-      if (iw < 0) continue;
-
-      // Check the receiving end of the path if it can handle the flow.
-      double ow = getExecutionCount(Dest);
-      Processed.clear();
-      for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
-           NBB != End; ++NBB) {
-        if (Processed.insert(*NBB).second) {
-          Edge e = getEdge(BB, *NBB);
-          double ew = getEdgeWeight(e);
-          if (ew != MissingValue) {
-            ow -= ew;
-          }
-        }
-      }
-      if (ow < 0) continue;
-
-      // Determine how much flow shall be used.
-      double ew = getEdgeWeight(getEdge(P[Dest],Dest));
-      if (ew != MissingValue) {
-        ew = ew<ow?ew:ow;
-        ew = ew<iw?ew:iw;
-      } else {
-        if (inmissing == 0)
-          ew = iw;
-      }
-
-      // Create flow.
-      if (ew != MissingValue) {
-        do {
-          Edge e = getEdge(P[Dest],Dest);
-          if (getEdgeWeight(e) == MissingValue) {
-            setEdgeWeight(e,ew);
-            FoundPath = true;
-          }
-          Dest = P[Dest];
-        } while (Dest != BB);
-      }
-    }
-    if (FoundPath) continue;
-
-    // Calculate a block with self loop.
-    FI = Unvisited.begin(), FE = Unvisited.end();
-    while(FI != FE && !FoundPath) {
-      const BasicBlock *BB = *FI; ++FI;
-      bool SelfEdgeFound = false;
-      for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
-           NBB != End; ++NBB) {
-        if (*NBB == BB) {
-          SelfEdgeFound = true;
-          break;
-        }
-      }
-      if (SelfEdgeFound) {
-        Edge e = getEdge(BB,BB);
-        if (getEdgeWeight(e) == MissingValue) {
-          double iw = 0;
-          std::set<const BasicBlock *> Processed;
-          for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
-               NBB != End; ++NBB) {
-            if (Processed.insert(*NBB).second) {
-              Edge e = getEdge(*NBB, BB);
-              double ew = getEdgeWeight(e);
-              if (ew != MissingValue) {
-                iw += ew;
-              }
-            }
-          }
-          setEdgeWeight(e,iw * 10);
-          FoundPath = true;
-        }
-      }
-    }
-    if (FoundPath) continue;
-
-    // Determine backedges, set them to zero.
-    FI = Unvisited.begin(), FE = Unvisited.end();
-    while(FI != FE && !FoundPath) {
-      const BasicBlock *BB = *FI; ++FI;
-      const BasicBlock *Dest = 0;
-      Path P;
-      bool BackEdgeFound = false;
-      for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
-           NBB != End; ++NBB) {
-        Dest = GetPath(BB, *NBB, P, GetPathToDest | GetPathWithNewEdges);
-        if (Dest == *NBB) {
-          BackEdgeFound = true;
-          break;
-        }
-      }
-      if (BackEdgeFound) {
-        Edge e = getEdge(Dest,BB);
-        double w = getEdgeWeight(e);
-        if (w == MissingValue) {
-          setEdgeWeight(e,0);
-          FoundPath = true;
-        }
-        do {
-          Edge e = getEdge(P[Dest], Dest);
-          double w = getEdgeWeight(e);
-          if (w == MissingValue) {
-            setEdgeWeight(e,0);
-            FoundPath = true;
-          }
-          Dest = P[Dest];
-        } while (Dest != BB);
-      }
-    }
-    if (FoundPath) continue;
-
-    // Channel flow to return block.
-    FI = Unvisited.begin(), FE = Unvisited.end();
-    while(FI != FE && !FoundPath) {
-      const BasicBlock *BB = *FI; ++FI;
-
-      Path P;
-      const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToExit | GetPathWithNewEdges);
-      Dest = P[(const BasicBlock*)0];
-      if (!Dest) continue;
-
-      if (getEdgeWeight(getEdge(Dest,0)) == MissingValue) {
-        // Calculate incoming flow.
-        double iw = 0;
-        std::set<const BasicBlock *> Processed;
-        for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
-             NBB != End; ++NBB) {
-          if (Processed.insert(*NBB).second) {
-            Edge e = getEdge(*NBB, BB);
-            double ew = getEdgeWeight(e);
-            if (ew != MissingValue) {
-              iw += ew;
-            }
-          }
-        }
-        do {
-          Edge e = getEdge(P[Dest], Dest);
-          double w = getEdgeWeight(e);
-          if (w == MissingValue) {
-            setEdgeWeight(e,iw);
-            FoundPath = true;
-          } else {
-            assert(0 && "Edge should not have value already!");
-          }
-          Dest = P[Dest];
-        } while (Dest != BB);
-      }
-    }
-    if (FoundPath) continue;
-
-    // Speculatively set edges to zero.
-    FI = Unvisited.begin(), FE = Unvisited.end();
-    while(FI != FE && !FoundPath) {
-      const BasicBlock *BB = *FI; ++FI;
-
-      for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
-           NBB != End; ++NBB) {
-        Edge e = getEdge(*NBB,BB);
-        double w = getEdgeWeight(e);
-        if (w == MissingValue) {
-          setEdgeWeight(e,0);
-          FoundPath = true;
-          break;
-        }
-      }
-    }
-    if (FoundPath) continue;
-
-    errs() << "{";
-    FI = Unvisited.begin(), FE = Unvisited.end();
-    while(FI != FE) {
-      const BasicBlock *BB = *FI; ++FI;
-      dbgs() << BB->getName();
-      if (FI != FE)
-        dbgs() << ",";
-    }
-    errs() << "}";
-
-    errs() << "ASSERT: could not repair function";
-    assert(0 && "could not repair function");
-  }
-
-  EdgeWeights J = EdgeInformation[F];
-  for (EdgeWeights::iterator EI = J.begin(), EE = J.end(); EI != EE; ++EI) {
-    Edge e = EI->first;
-
-    bool SuccFound = false;
-    if (e.first != 0) {
-      succ_const_iterator NBB = succ_begin(e.first), End = succ_end(e.first);
-      if (NBB == End) {
-        if (0 == e.second) {
-          SuccFound = true;
-        }
-      }
-      for (;NBB != End; ++NBB) {
-        if (*NBB == e.second) {
-          SuccFound = true;
-          break;
-        }
-      }
-      if (!SuccFound) {
-        removeEdge(e);
-      }
-    }
-  }
-}
-
-raw_ostream& operator<<(raw_ostream &O, const MachineFunction *MF) {
-  return O << MF->getFunction()->getName() << "(MF)";
-}
-
-raw_ostream& operator<<(raw_ostream &O, const MachineBasicBlock *MBB) {
-  return O << MBB->getBasicBlock()->getName() << "(MB)";
-}
-
-raw_ostream& operator<<(raw_ostream &O, std::pair<const MachineBasicBlock *, const MachineBasicBlock *> E) {
-  O << "(";
-
-  if (E.first)
-    O << E.first;
-  else
-    O << "0";
-
-  O << ",";
-
-  if (E.second)
-    O << E.second;
-  else
-    O << "0";
-
-  return O << ")";
-}
-
-} // namespace llvm
-
-//===----------------------------------------------------------------------===//
-//  NoProfile ProfileInfo implementation
-//
-
-namespace {
-  struct NoProfileInfo : public ImmutablePass, public ProfileInfo {
-    static char ID; // Class identification, replacement for typeinfo
-    NoProfileInfo() : ImmutablePass(ID) {
-      initializeNoProfileInfoPass(*PassRegistry::getPassRegistry());
-    }
-    
-    /// getAdjustedAnalysisPointer - This method is used when a pass implements
-    /// an analysis interface through multiple inheritance.  If needed, it
-    /// should override this to adjust the this pointer as needed for the
-    /// specified pass info.
-    virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
-      if (PI == &ProfileInfo::ID)
-        return (ProfileInfo*)this;
-      return this;
-    }
-    
-    virtual const char *getPassName() const {
-      return "NoProfileInfo";
-    }
-  };
-}  // End of anonymous namespace
-
-char NoProfileInfo::ID = 0;
-// Register this pass...
-INITIALIZE_AG_PASS(NoProfileInfo, ProfileInfo, "no-profile",
-                   "No Profile Information", false, true, true)
-
-ImmutablePass *llvm::createNoProfileInfoPass() { return new NoProfileInfo(); }
diff --git a/lib/Analysis/ProfileInfoLoader.cpp b/lib/Analysis/ProfileInfoLoader.cpp
deleted file mode 100644
index f1f3e94..0000000
--- a/lib/Analysis/ProfileInfoLoader.cpp
+++ /dev/null
@@ -1,155 +0,0 @@
-//===- ProfileInfoLoad.cpp - Load profile information from disk -----------===//
-//
-//                      The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// The ProfileInfoLoader class is used to load and represent profiling
-// information read in from the dump file.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/ProfileInfoLoader.h"
-#include "llvm/Analysis/ProfileInfoTypes.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cstdio>
-#include <cstdlib>
-using namespace llvm;
-
-// ByteSwap - Byteswap 'Var' if 'Really' is true.
-//
-static inline unsigned ByteSwap(unsigned Var, bool Really) {
-  if (!Really) return Var;
-  return ((Var & (255U<< 0U)) << 24U) |
-         ((Var & (255U<< 8U)) <<  8U) |
-         ((Var & (255U<<16U)) >>  8U) |
-         ((Var & (255U<<24U)) >> 24U);
-}
-
-static unsigned AddCounts(unsigned A, unsigned B) {
-  // If either value is undefined, use the other.
-  if (A == ProfileInfoLoader::Uncounted) return B;
-  if (B == ProfileInfoLoader::Uncounted) return A;
-  return A + B;
-}
-
-static void ReadProfilingBlock(const char *ToolName, FILE *F,
-                               bool ShouldByteSwap,
-                               std::vector<unsigned> &Data) {
-  // Read the number of entries...
-  unsigned NumEntries;
-  if (fread(&NumEntries, sizeof(unsigned), 1, F) != 1) {
-    errs() << ToolName << ": data packet truncated!\n";
-    perror(0);
-    exit(1);
-  }
-  NumEntries = ByteSwap(NumEntries, ShouldByteSwap);
-
-  // Read the counts...
-  std::vector<unsigned> TempSpace(NumEntries);
-
-  // Read in the block of data...
-  if (fread(&TempSpace[0], sizeof(unsigned)*NumEntries, 1, F) != 1) {
-    errs() << ToolName << ": data packet truncated!\n";
-    perror(0);
-    exit(1);
-  }
-
-  // Make sure we have enough space... The space is initialised to -1 to
-  // facitiltate the loading of missing values for OptimalEdgeProfiling.
-  if (Data.size() < NumEntries)
-    Data.resize(NumEntries, ProfileInfoLoader::Uncounted);
-
-  // Accumulate the data we just read into the data.
-  if (!ShouldByteSwap) {
-    for (unsigned i = 0; i != NumEntries; ++i) {
-      Data[i] = AddCounts(TempSpace[i], Data[i]);
-    }
-  } else {
-    for (unsigned i = 0; i != NumEntries; ++i) {
-      Data[i] = AddCounts(ByteSwap(TempSpace[i], true), Data[i]);
-    }
-  }
-}
-
-const unsigned ProfileInfoLoader::Uncounted = ~0U;
-
-// ProfileInfoLoader ctor - Read the specified profiling data file, exiting the
-// program if the file is invalid or broken.
-//
-ProfileInfoLoader::ProfileInfoLoader(const char *ToolName,
-                                     const std::string &Filename)
-  : Filename(Filename) {
-  FILE *F = fopen(Filename.c_str(), "rb");
-  if (F == 0) {
-    errs() << ToolName << ": Error opening '" << Filename << "': ";
-    perror(0);
-    exit(1);
-  }
-
-  // Keep reading packets until we run out of them.
-  unsigned PacketType;
-  while (fread(&PacketType, sizeof(unsigned), 1, F) == 1) {
-    // If the low eight bits of the packet are zero, we must be dealing with an
-    // endianness mismatch.  Byteswap all words read from the profiling
-    // information.
-    bool ShouldByteSwap = (char)PacketType == 0;
-    PacketType = ByteSwap(PacketType, ShouldByteSwap);
-
-    switch (PacketType) {
-    case ArgumentInfo: {
-      unsigned ArgLength;
-      if (fread(&ArgLength, sizeof(unsigned), 1, F) != 1) {
-        errs() << ToolName << ": arguments packet truncated!\n";
-        perror(0);
-        exit(1);
-      }
-      ArgLength = ByteSwap(ArgLength, ShouldByteSwap);
-
-      // Read in the arguments...
-      std::vector<char> Chars(ArgLength+4);
-
-      if (ArgLength)
-        if (fread(&Chars[0], (ArgLength+3) & ~3, 1, F) != 1) {
-          errs() << ToolName << ": arguments packet truncated!\n";
-          perror(0);
-          exit(1);
-        }
-      CommandLines.push_back(std::string(&Chars[0], &Chars[ArgLength]));
-      break;
-    }
-
-    case FunctionInfo:
-      ReadProfilingBlock(ToolName, F, ShouldByteSwap, FunctionCounts);
-      break;
-
-    case BlockInfo:
-      ReadProfilingBlock(ToolName, F, ShouldByteSwap, BlockCounts);
-      break;
-
-    case EdgeInfo:
-      ReadProfilingBlock(ToolName, F, ShouldByteSwap, EdgeCounts);
-      break;
-
-    case OptEdgeInfo:
-      ReadProfilingBlock(ToolName, F, ShouldByteSwap, OptimalEdgeCounts);
-      break;
-
-    case BBTraceInfo:
-      ReadProfilingBlock(ToolName, F, ShouldByteSwap, BBTrace);
-      break;
-
-    default:
-      errs() << ToolName << ": Unknown packet type #" << PacketType << "!\n";
-      exit(1);
-    }
-  }
-
-  fclose(F);
-}
-
diff --git a/lib/Analysis/ProfileInfoLoaderPass.cpp b/lib/Analysis/ProfileInfoLoaderPass.cpp
deleted file mode 100644
index 346f8d6..0000000
--- a/lib/Analysis/ProfileInfoLoaderPass.cpp
+++ /dev/null
@@ -1,267 +0,0 @@
-//===- ProfileInfoLoaderPass.cpp - LLVM Pass to load profile info ---------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a concrete implementation of profiling information that
-// loads the information from a profile dump file.
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "profile-loader"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/Analysis/ProfileInfoLoader.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-#include <set>
-using namespace llvm;
-
-STATISTIC(NumEdgesRead, "The # of edges read.");
-
-static cl::opt<std::string>
-ProfileInfoFilename("profile-info-file", cl::init("llvmprof.out"),
-                    cl::value_desc("filename"),
-                    cl::desc("Profile file loaded by -profile-loader"));
-
-namespace {
-  class LoaderPass : public ModulePass, public ProfileInfo {
-    std::string Filename;
-    std::set<Edge> SpanningTree;
-    std::set<const BasicBlock*> BBisUnvisited;
-    unsigned ReadCount;
-  public:
-    static char ID; // Class identification, replacement for typeinfo
-    explicit LoaderPass(const std::string &filename = "")
-      : ModulePass(ID), Filename(filename) {
-      initializeLoaderPassPass(*PassRegistry::getPassRegistry());
-      if (filename.empty()) Filename = ProfileInfoFilename;
-    }
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesAll();
-    }
-
-    virtual const char *getPassName() const {
-      return "Profiling information loader";
-    }
-
-    // recurseBasicBlock() - Calculates the edge weights for as much basic
-    // blocks as possbile.
-    virtual void recurseBasicBlock(const BasicBlock *BB);
-    virtual void readEdgeOrRemember(Edge, Edge&, unsigned &, double &);
-    virtual void readEdge(ProfileInfo::Edge, std::vector<unsigned>&);
-
-    /// getAdjustedAnalysisPointer - This method is used when a pass implements
-    /// an analysis interface through multiple inheritance.  If needed, it
-    /// should override this to adjust the this pointer as needed for the
-    /// specified pass info.
-    virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
-      if (PI == &ProfileInfo::ID)
-        return (ProfileInfo*)this;
-      return this;
-    }
-    
-    /// run - Load the profile information from the specified file.
-    virtual bool runOnModule(Module &M);
-  };
-}  // End of anonymous namespace
-
-char LoaderPass::ID = 0;
-INITIALIZE_AG_PASS(LoaderPass, ProfileInfo, "profile-loader",
-              "Load profile information from llvmprof.out", false, true, false)
-
-char &llvm::ProfileLoaderPassID = LoaderPass::ID;
-
-ModulePass *llvm::createProfileLoaderPass() { return new LoaderPass(); }
-
-/// createProfileLoaderPass - This function returns a Pass that loads the
-/// profiling information for the module from the specified filename, making it
-/// available to the optimizers.
-Pass *llvm::createProfileLoaderPass(const std::string &Filename) {
-  return new LoaderPass(Filename);
-}
-
-void LoaderPass::readEdgeOrRemember(Edge edge, Edge &tocalc, 
-                                    unsigned &uncalc, double &count) {
-  double w;
-  if ((w = getEdgeWeight(edge)) == MissingValue) {
-    tocalc = edge;
-    uncalc++;
-  } else {
-    count+=w;
-  }
-}
-
-// recurseBasicBlock - Visits all neighbours of a block and then tries to
-// calculate the missing edge values.
-void LoaderPass::recurseBasicBlock(const BasicBlock *BB) {
-
-  // break recursion if already visited
-  if (BBisUnvisited.find(BB) == BBisUnvisited.end()) return;
-  BBisUnvisited.erase(BB);
-  if (!BB) return;
-
-  for (succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
-       bbi != bbe; ++bbi) {
-    recurseBasicBlock(*bbi);
-  }
-  for (const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
-       bbi != bbe; ++bbi) {
-    recurseBasicBlock(*bbi);
-  }
-
-  Edge tocalc;
-  if (CalculateMissingEdge(BB, tocalc)) {
-    SpanningTree.erase(tocalc);
-  }
-}
-
-void LoaderPass::readEdge(ProfileInfo::Edge e,
-                          std::vector<unsigned> &ECs) {
-  if (ReadCount < ECs.size()) {
-    double weight = ECs[ReadCount++];
-    if (weight != ProfileInfoLoader::Uncounted) {
-      // Here the data realm changes from the unsigned of the file to the
-      // double of the ProfileInfo. This conversion is save because we know
-      // that everything thats representable in unsinged is also representable
-      // in double.
-      EdgeInformation[getFunction(e)][e] += (double)weight;
-
-      DEBUG(dbgs() << "--Read Edge Counter for " << e
-                   << " (# "<< (ReadCount-1) << "): "
-                   << (unsigned)getEdgeWeight(e) << "\n");
-    } else {
-      // This happens only if reading optimal profiling information, not when
-      // reading regular profiling information.
-      SpanningTree.insert(e);
-    }
-  }
-}
-
-bool LoaderPass::runOnModule(Module &M) {
-  ProfileInfoLoader PIL("profile-loader", Filename);
-
-  EdgeInformation.clear();
-  std::vector<unsigned> Counters = PIL.getRawEdgeCounts();
-  if (Counters.size() > 0) {
-    ReadCount = 0;
-    for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
-      if (F->isDeclaration()) continue;
-      DEBUG(dbgs() << "Working on " << F->getName() << "\n");
-      readEdge(getEdge(0,&F->getEntryBlock()), Counters);
-      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
-        TerminatorInst *TI = BB->getTerminator();
-        for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
-          readEdge(getEdge(BB,TI->getSuccessor(s)), Counters);
-        }
-      }
-    }
-    if (ReadCount != Counters.size()) {
-      errs() << "WARNING: profile information is inconsistent with "
-             << "the current program!\n";
-    }
-    NumEdgesRead = ReadCount;
-  }
-
-  Counters = PIL.getRawOptimalEdgeCounts();
-  if (Counters.size() > 0) {
-    ReadCount = 0;
-    for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
-      if (F->isDeclaration()) continue;
-      DEBUG(dbgs() << "Working on " << F->getName() << "\n");
-      readEdge(getEdge(0,&F->getEntryBlock()), Counters);
-      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
-        TerminatorInst *TI = BB->getTerminator();
-        if (TI->getNumSuccessors() == 0) {
-          readEdge(getEdge(BB,0), Counters);
-        }
-        for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
-          readEdge(getEdge(BB,TI->getSuccessor(s)), Counters);
-        }
-      }
-      while (SpanningTree.size() > 0) {
-
-        unsigned size = SpanningTree.size();
-
-        BBisUnvisited.clear();
-        for (std::set<Edge>::iterator ei = SpanningTree.begin(),
-             ee = SpanningTree.end(); ei != ee; ++ei) {
-          BBisUnvisited.insert(ei->first);
-          BBisUnvisited.insert(ei->second);
-        }
-        while (BBisUnvisited.size() > 0) {
-          recurseBasicBlock(*BBisUnvisited.begin());
-        }
-
-        if (SpanningTree.size() == size) {
-          DEBUG(dbgs()<<"{");
-          for (std::set<Edge>::iterator ei = SpanningTree.begin(),
-               ee = SpanningTree.end(); ei != ee; ++ei) {
-            DEBUG(dbgs()<< *ei <<",");
-          }
-          assert(0 && "No edge calculated!");
-        }
-
-      }
-    }
-    if (ReadCount != Counters.size()) {
-      errs() << "WARNING: profile information is inconsistent with "
-             << "the current program!\n";
-    }
-    NumEdgesRead = ReadCount;
-  }
-
-  BlockInformation.clear();
-  Counters = PIL.getRawBlockCounts();
-  if (Counters.size() > 0) {
-    ReadCount = 0;
-    for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
-      if (F->isDeclaration()) continue;
-      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
-        if (ReadCount < Counters.size())
-          // Here the data realm changes from the unsigned of the file to the
-          // double of the ProfileInfo. This conversion is save because we know
-          // that everything thats representable in unsinged is also
-          // representable in double.
-          BlockInformation[F][BB] = (double)Counters[ReadCount++];
-    }
-    if (ReadCount != Counters.size()) {
-      errs() << "WARNING: profile information is inconsistent with "
-             << "the current program!\n";
-    }
-  }
-
-  FunctionInformation.clear();
-  Counters = PIL.getRawFunctionCounts();
-  if (Counters.size() > 0) {
-    ReadCount = 0;
-    for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
-      if (F->isDeclaration()) continue;
-      if (ReadCount < Counters.size())
-        // Here the data realm changes from the unsigned of the file to the
-        // double of the ProfileInfo. This conversion is save because we know
-        // that everything thats representable in unsinged is also
-        // representable in double.
-        FunctionInformation[F] = (double)Counters[ReadCount++];
-    }
-    if (ReadCount != Counters.size()) {
-      errs() << "WARNING: profile information is inconsistent with "
-             << "the current program!\n";
-    }
-  }
-
-  return false;
-}
diff --git a/lib/Analysis/ProfileVerifierPass.cpp b/lib/Analysis/ProfileVerifierPass.cpp
deleted file mode 100644
index c8896de..0000000
--- a/lib/Analysis/ProfileVerifierPass.cpp
+++ /dev/null
@@ -1,383 +0,0 @@
-//===- ProfileVerifierPass.cpp - LLVM Pass to estimate profile info -------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a pass that checks profiling information for 
-// plausibility.
-//
-//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "profile-verifier"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/InstIterator.h"
-#include "llvm/Support/raw_ostream.h"
-#include <set>
-using namespace llvm;
-
-static cl::opt<bool,false>
-ProfileVerifierDisableAssertions("profile-verifier-noassert",
-     cl::desc("Disable assertions"));
-
-namespace {
-  template<class FType, class BType>
-  class ProfileVerifierPassT : public FunctionPass {
-
-    struct DetailedBlockInfo {
-      const BType *BB;
-      double      BBWeight;
-      double      inWeight;
-      int         inCount;
-      double      outWeight;
-      int         outCount;
-    };
-
-    ProfileInfoT<FType, BType> *PI;
-    std::set<const BType*> BBisVisited;
-    std::set<const FType*>   FisVisited;
-    bool DisableAssertions;
-
-    // When debugging is enabled, the verifier prints a whole slew of debug
-    // information, otherwise its just the assert. These are all the helper
-    // functions.
-    bool PrintedDebugTree;
-    std::set<const BType*> BBisPrinted;
-    void debugEntry(DetailedBlockInfo*);
-    void printDebugInfo(const BType *BB);
-
-  public:
-    static char ID; // Class identification, replacement for typeinfo
-
-    explicit ProfileVerifierPassT () : FunctionPass(ID) {
-      initializeProfileVerifierPassPass(*PassRegistry::getPassRegistry());
-      DisableAssertions = ProfileVerifierDisableAssertions;
-    }
-    explicit ProfileVerifierPassT (bool da) : FunctionPass(ID), 
-                                              DisableAssertions(da) {
-      initializeProfileVerifierPassPass(*PassRegistry::getPassRegistry());
-    }
-
-    void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesAll();
-      AU.addRequired<ProfileInfoT<FType, BType> >();
-    }
-
-    const char *getPassName() const {
-      return "Profiling information verifier";
-    }
-
-    /// run - Verify the profile information.
-    bool runOnFunction(FType &F);
-    void recurseBasicBlock(const BType*);
-
-    bool   exitReachable(const FType*);
-    double ReadOrAssert(typename ProfileInfoT<FType, BType>::Edge);
-    void   CheckValue(bool, const char*, DetailedBlockInfo*);
-  };
-
-  typedef ProfileVerifierPassT<Function, BasicBlock> ProfileVerifierPass;
-
-  template<class FType, class BType>
-  void ProfileVerifierPassT<FType, BType>::printDebugInfo(const BType *BB) {
-
-    if (BBisPrinted.find(BB) != BBisPrinted.end()) return;
-
-    double BBWeight = PI->getExecutionCount(BB);
-    if (BBWeight == ProfileInfoT<FType, BType>::MissingValue) { BBWeight = 0; }
-    double inWeight = 0;
-    int inCount = 0;
-    std::set<const BType*> ProcessedPreds;
-    for (const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
-         bbi != bbe; ++bbi ) {
-      if (ProcessedPreds.insert(*bbi).second) {
-        typename ProfileInfoT<FType, BType>::Edge E = PI->getEdge(*bbi,BB);
-        double EdgeWeight = PI->getEdgeWeight(E);
-        if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { EdgeWeight = 0; }
-        dbgs() << "calculated in-edge " << E << ": " 
-               << format("%20.20g",EdgeWeight) << "\n";
-        inWeight += EdgeWeight;
-        inCount++;
-      }
-    }
-    double outWeight = 0;
-    int outCount = 0;
-    std::set<const BType*> ProcessedSuccs;
-    for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
-          bbi != bbe; ++bbi ) {
-      if (ProcessedSuccs.insert(*bbi).second) {
-        typename ProfileInfoT<FType, BType>::Edge E = PI->getEdge(BB,*bbi);
-        double EdgeWeight = PI->getEdgeWeight(E);
-        if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { EdgeWeight = 0; }
-        dbgs() << "calculated out-edge " << E << ": " 
-               << format("%20.20g",EdgeWeight) << "\n";
-        outWeight += EdgeWeight;
-        outCount++;
-      }
-    }
-    dbgs() << "Block " << BB->getName()                   << " in "
-           << BB->getParent()->getName()                  << ":"
-           << "BBWeight="  << format("%20.20g",BBWeight)  << ","
-           << "inWeight="  << format("%20.20g",inWeight)  << ","
-           << "inCount="   << inCount                     << ","
-           << "outWeight=" << format("%20.20g",outWeight) << ","
-           << "outCount"   << outCount                    << "\n";
-
-    // mark as visited and recurse into subnodes
-    BBisPrinted.insert(BB);
-    for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); 
-          bbi != bbe; ++bbi ) {
-      printDebugInfo(*bbi);
-    }
-  }
-
-  template<class FType, class BType>
-  void ProfileVerifierPassT<FType, BType>::debugEntry (DetailedBlockInfo *DI) {
-    dbgs() << "TROUBLE: Block " << DI->BB->getName()          << " in "
-           << DI->BB->getParent()->getName()                  << ":"
-           << "BBWeight="  << format("%20.20g",DI->BBWeight)  << ","
-           << "inWeight="  << format("%20.20g",DI->inWeight)  << ","
-           << "inCount="   << DI->inCount                     << ","
-           << "outWeight=" << format("%20.20g",DI->outWeight) << ","
-           << "outCount="  << DI->outCount                    << "\n";
-    if (!PrintedDebugTree) {
-      PrintedDebugTree = true;
-      printDebugInfo(&(DI->BB->getParent()->getEntryBlock()));
-    }
-  }
-
-  // This compares A and B for equality.
-  static bool Equals(double A, double B) {
-    return A == B;
-  }
-
-  // This checks if the function "exit" is reachable from an given function
-  // via calls, this is necessary to check if a profile is valid despite the
-  // counts not fitting exactly.
-  template<class FType, class BType>
-  bool ProfileVerifierPassT<FType, BType>::exitReachable(const FType *F) {
-    if (!F) return false;
-
-    if (FisVisited.count(F)) return false;
-
-    FType *Exit = F->getParent()->getFunction("exit");
-    if (Exit == F) {
-      return true;
-    }
-
-    FisVisited.insert(F);
-    bool exits = false;
-    for (const_inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
-      if (const CallInst *CI = dyn_cast<CallInst>(&*I)) {
-        FType *F = CI->getCalledFunction();
-        if (F) {
-          exits |= exitReachable(F);
-        } else {
-          // This is a call to a pointer, all bets are off...
-          exits = true;
-        }
-        if (exits) break;
-      }
-    }
-    return exits;
-  }
-
-  #define ASSERTMESSAGE(M) \
-    { dbgs() << "ASSERT:" << (M) << "\n"; \
-      if (!DisableAssertions) assert(0 && (M)); }
-
-  template<class FType, class BType>
-  double ProfileVerifierPassT<FType, BType>::ReadOrAssert(typename ProfileInfoT<FType, BType>::Edge E) {
-    double EdgeWeight = PI->getEdgeWeight(E);
-    if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) {
-      dbgs() << "Edge " << E << " in Function " 
-             << ProfileInfoT<FType, BType>::getFunction(E)->getName() << ": ";
-      ASSERTMESSAGE("Edge has missing value");
-      return 0;
-    } else {
-      if (EdgeWeight < 0) {
-        dbgs() << "Edge " << E << " in Function " 
-               << ProfileInfoT<FType, BType>::getFunction(E)->getName() << ": ";
-        ASSERTMESSAGE("Edge has negative value");
-      }
-      return EdgeWeight;
-    }
-  }
-
-  template<class FType, class BType>
-  void ProfileVerifierPassT<FType, BType>::CheckValue(bool Error, 
-                                                      const char *Message,
-                                                      DetailedBlockInfo *DI) {
-    if (Error) {
-      DEBUG(debugEntry(DI));
-      dbgs() << "Block " << DI->BB->getName() << " in Function "
-             << DI->BB->getParent()->getName() << ": ";
-      ASSERTMESSAGE(Message);
-    }
-    return;
-  }
-
-  // This calculates the Information for a block and then recurses into the
-  // successors.
-  template<class FType, class BType>
-  void ProfileVerifierPassT<FType, BType>::recurseBasicBlock(const BType *BB) {
-
-    // Break the recursion by remembering all visited blocks.
-    if (BBisVisited.find(BB) != BBisVisited.end()) return;
-
-    // Use a data structure to store all the information, this can then be handed
-    // to debug printers.
-    DetailedBlockInfo DI;
-    DI.BB = BB;
-    DI.outCount = DI.inCount = 0;
-    DI.inWeight = DI.outWeight = 0;
-
-    // Read predecessors.
-    std::set<const BType*> ProcessedPreds;
-    const_pred_iterator bpi = pred_begin(BB), bpe = pred_end(BB);
-    // If there are none, check for (0,BB) edge.
-    if (bpi == bpe) {
-      DI.inWeight += ReadOrAssert(PI->getEdge(0,BB));
-      DI.inCount++;
-    }
-    for (;bpi != bpe; ++bpi) {
-      if (ProcessedPreds.insert(*bpi).second) {
-        DI.inWeight += ReadOrAssert(PI->getEdge(*bpi,BB));
-        DI.inCount++;
-      }
-    }
-
-    // Read successors.
-    std::set<const BType*> ProcessedSuccs;
-    succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
-    // If there is an (0,BB) edge, consider it too. (This is done not only when
-    // there are no successors, but every time; not every function contains
-    // return blocks with no successors (think loop latch as return block)).
-    double w = PI->getEdgeWeight(PI->getEdge(BB,0));
-    if (w != ProfileInfoT<FType, BType>::MissingValue) {
-      DI.outWeight += w;
-      DI.outCount++;
-    }
-    for (;bbi != bbe; ++bbi) {
-      if (ProcessedSuccs.insert(*bbi).second) {
-        DI.outWeight += ReadOrAssert(PI->getEdge(BB,*bbi));
-        DI.outCount++;
-      }
-    }
-
-    // Read block weight.
-    DI.BBWeight = PI->getExecutionCount(BB);
-    CheckValue(DI.BBWeight == ProfileInfoT<FType, BType>::MissingValue,
-               "BasicBlock has missing value", &DI);
-    CheckValue(DI.BBWeight < 0,
-               "BasicBlock has negative value", &DI);
-
-    // Check if this block is a setjmp target.
-    bool isSetJmpTarget = false;
-    if (DI.outWeight > DI.inWeight) {
-      for (typename BType::const_iterator i = BB->begin(), ie = BB->end();
-           i != ie; ++i) {
-        if (const CallInst *CI = dyn_cast<CallInst>(&*i)) {
-          FType *F = CI->getCalledFunction();
-          if (F && (F->getName() == "_setjmp")) {
-            isSetJmpTarget = true; break;
-          }
-        }
-      }
-    }
-    // Check if this block is eventually reaching exit.
-    bool isExitReachable = false;
-    if (DI.inWeight > DI.outWeight) {
-      for (typename BType::const_iterator i = BB->begin(), ie = BB->end();
-           i != ie; ++i) {
-        if (const CallInst *CI = dyn_cast<CallInst>(&*i)) {
-          FType *F = CI->getCalledFunction();
-          if (F) {
-            FisVisited.clear();
-            isExitReachable |= exitReachable(F);
-          } else {
-            // This is a call to a pointer, all bets are off...
-            isExitReachable = true;
-          }
-          if (isExitReachable) break;
-        }
-      }
-    }
-
-    if (DI.inCount > 0 && DI.outCount == 0) {
-       // If this is a block with no successors.
-      if (!isSetJmpTarget) {
-        CheckValue(!Equals(DI.inWeight,DI.BBWeight), 
-                   "inWeight and BBWeight do not match", &DI);
-      }
-    } else if (DI.inCount == 0 && DI.outCount > 0) {
-      // If this is a block with no predecessors.
-      if (!isExitReachable)
-        CheckValue(!Equals(DI.BBWeight,DI.outWeight), 
-                   "BBWeight and outWeight do not match", &DI);
-    } else {
-      // If this block has successors and predecessors.
-      if (DI.inWeight > DI.outWeight && !isExitReachable)
-        CheckValue(!Equals(DI.inWeight,DI.outWeight), 
-                   "inWeight and outWeight do not match", &DI);
-      if (DI.inWeight < DI.outWeight && !isSetJmpTarget)
-        CheckValue(!Equals(DI.inWeight,DI.outWeight), 
-                   "inWeight and outWeight do not match", &DI);
-    }
-
-
-    // Mark this block as visited, rescurse into successors.
-    BBisVisited.insert(BB);
-    for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); 
-          bbi != bbe; ++bbi ) {
-      recurseBasicBlock(*bbi);
-    }
-  }
-
-  template<class FType, class BType>
-  bool ProfileVerifierPassT<FType, BType>::runOnFunction(FType &F) {
-    PI = getAnalysisIfAvailable<ProfileInfoT<FType, BType> >();
-    if (!PI)
-      ASSERTMESSAGE("No ProfileInfo available");
-
-    // Prepare global variables.
-    PrintedDebugTree = false;
-    BBisVisited.clear();
-
-    // Fetch entry block and recurse into it.
-    const BType *entry = &F.getEntryBlock();
-    recurseBasicBlock(entry);
-
-    if (PI->getExecutionCount(&F) != PI->getExecutionCount(entry))
-      ASSERTMESSAGE("Function count and entry block count do not match");
-
-    return false;
-  }
-
-  template<class FType, class BType>
-  char ProfileVerifierPassT<FType, BType>::ID = 0;
-}
-
-INITIALIZE_PASS_BEGIN(ProfileVerifierPass, "profile-verifier",
-                "Verify profiling information", false, true)
-INITIALIZE_AG_DEPENDENCY(ProfileInfo)
-INITIALIZE_PASS_END(ProfileVerifierPass, "profile-verifier",
-                "Verify profiling information", false, true)
-
-namespace llvm {
-  FunctionPass *createProfileVerifierPass() {
-    return new ProfileVerifierPass(ProfileVerifierDisableAssertions); 
-  }
-}
-
diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp
index 8577025..5635688 100644
--- a/lib/Analysis/RegionInfo.cpp
+++ b/lib/Analysis/RegionInfo.cpp
@@ -9,6 +9,7 @@
 // Detects single entry single exit regions in the control flow graph.
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "region"
 #include "llvm/Analysis/RegionInfo.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/Statistic.h"
@@ -17,12 +18,9 @@
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
-
-#define DEBUG_TYPE "region"
 #include "llvm/Support/Debug.h"
-
-#include <set>
 #include <algorithm>
+#include <set>
 
 using namespace llvm;
 
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index f5d095b..0a02f4e 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -2590,55 +2590,39 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
   return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
 }
 
-const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) {
+const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
   // If we have DataLayout, we can bypass creating a target-independent
   // constant expression and then folding it back into a ConstantInt.
   // This is just a compile-time optimization.
   if (TD)
-    return getConstant(TD->getIntPtrType(getContext()),
-                       TD->getTypeAllocSize(AllocTy));
+    return getConstant(IntTy, TD->getTypeAllocSize(AllocTy));
 
   Constant *C = ConstantExpr::getSizeOf(AllocTy);
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
     if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
       C = Folded;
   Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
+  assert(Ty == IntTy && "Effective SCEV type doesn't match");
   return getTruncateOrZeroExtend(getSCEV(C), Ty);
 }
 
-const SCEV *ScalarEvolution::getAlignOfExpr(Type *AllocTy) {
-  Constant *C = ConstantExpr::getAlignOf(AllocTy);
-  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
-      C = Folded;
-  Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
-  return getTruncateOrZeroExtend(getSCEV(C), Ty);
-}
-
-const SCEV *ScalarEvolution::getOffsetOfExpr(StructType *STy,
+const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy,
+                                             StructType *STy,
                                              unsigned FieldNo) {
   // If we have DataLayout, we can bypass creating a target-independent
   // constant expression and then folding it back into a ConstantInt.
   // This is just a compile-time optimization.
-  if (TD)
-    return getConstant(TD->getIntPtrType(getContext()),
+  if (TD) {
+    return getConstant(IntTy,
                        TD->getStructLayout(STy)->getElementOffset(FieldNo));
+  }
 
   Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo);
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
     if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
       C = Folded;
-  Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
-  return getTruncateOrZeroExtend(getSCEV(C), Ty);
-}
 
-const SCEV *ScalarEvolution::getOffsetOfExpr(Type *CTy,
-                                             Constant *FieldNo) {
-  Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo);
-  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
-      C = Folded;
-  Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy));
+  Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
   return getTruncateOrZeroExtend(getSCEV(C), Ty);
 }
 
@@ -2703,12 +2687,15 @@ uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
 Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
   assert(isSCEVable(Ty) && "Type is not SCEVable!");
 
-  if (Ty->isIntegerTy())
+  if (Ty->isIntegerTy()) {
     return Ty;
+  }
 
   // The only other support type is pointer.
   assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
-  if (TD) return TD->getIntPtrType(getContext());
+
+  if (TD)
+    return TD->getIntPtrType(Ty);
 
   // Without DataLayout, conservatively assume pointers are 64-bit.
   return Type::getInt64Ty(getContext());
@@ -3101,15 +3088,26 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
                   Flags = setFlags(Flags, SCEV::FlagNUW);
                 if (OBO->hasNoSignedWrap())
                   Flags = setFlags(Flags, SCEV::FlagNSW);
-              } else if (const GEPOperator *GEP =
-                         dyn_cast<GEPOperator>(BEValueV)) {
+              } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) {
                 // If the increment is an inbounds GEP, then we know the address
                 // space cannot be wrapped around. We cannot make any guarantee
                 // about signed or unsigned overflow because pointers are
                 // unsigned but we may have a negative index from the base
-                // pointer.
-                if (GEP->isInBounds())
+                // pointer. We can guarantee that no unsigned wrap occurs if the
+                // indices form a positive value.
+                if (GEP->isInBounds()) {
                   Flags = setFlags(Flags, SCEV::FlagNW);
+
+                  const SCEV *Ptr = getSCEV(GEP->getPointerOperand());
+                  if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr)))
+                    Flags = setFlags(Flags, SCEV::FlagNUW);
+                }
+              } else if (const SubOperator *OBO =
+                           dyn_cast<SubOperator>(BEValueV)) {
+                if (OBO->hasNoUnsignedWrap())
+                  Flags = setFlags(Flags, SCEV::FlagNUW);
+                if (OBO->hasNoSignedWrap())
+                  Flags = setFlags(Flags, SCEV::FlagNSW);
               }
 
               const SCEV *StartVal = getSCEV(StartValueV);
@@ -3177,18 +3175,18 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
 /// operations. This allows them to be analyzed by regular SCEV code.
 ///
 const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
+  Type *IntPtrTy = getEffectiveSCEVType(GEP->getType());
+  Value *Base = GEP->getOperand(0);
+  // Don't attempt to analyze GEPs over unsized objects.
+  if (!Base->getType()->getPointerElementType()->isSized())
+    return getUnknown(GEP);
 
   // Don't blindly transfer the inbounds flag from the GEP instruction to the
   // Add expression, because the Instruction may be guarded by control flow
   // and the no-overflow bits may not be valid for the expression in any
   // context.
-  bool isInBounds = GEP->isInBounds();
+  SCEV::NoWrapFlags Wrap = GEP->isInBounds() ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
 
-  Type *IntPtrTy = getEffectiveSCEVType(GEP->getType());
-  Value *Base = GEP->getOperand(0);
-  // Don't attempt to analyze GEPs over unsized objects.
-  if (!cast<PointerType>(Base->getType())->getElementType()->isSized())
-    return getUnknown(GEP);
   const SCEV *TotalOffset = getConstant(IntPtrTy, 0);
   gep_type_iterator GTI = gep_type_begin(GEP);
   for (GetElementPtrInst::op_iterator I = llvm::next(GEP->op_begin()),
@@ -3199,21 +3197,19 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
     if (StructType *STy = dyn_cast<StructType>(*GTI++)) {
       // For a struct, add the member offset.
       unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
-      const SCEV *FieldOffset = getOffsetOfExpr(STy, FieldNo);
+      const SCEV *FieldOffset = getOffsetOfExpr(IntPtrTy, STy, FieldNo);
 
       // Add the field offset to the running total offset.
       TotalOffset = getAddExpr(TotalOffset, FieldOffset);
     } else {
       // For an array, add the element offset, explicitly scaled.
-      const SCEV *ElementSize = getSizeOfExpr(*GTI);
+      const SCEV *ElementSize = getSizeOfExpr(IntPtrTy, *GTI);
       const SCEV *IndexS = getSCEV(Index);
       // Getelementptr indices are signed.
       IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy);
 
       // Multiply the index by the element size to compute the element offset.
-      const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize,
-                                           isInBounds ? SCEV::FlagNSW :
-                                           SCEV::FlagAnyWrap);
+      const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize, Wrap);
 
       // Add the element offset to the running total offset.
       TotalOffset = getAddExpr(TotalOffset, LocalOffset);
@@ -3224,8 +3220,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
   const SCEV *BaseS = getSCEV(Base);
 
   // Add the total offset from all the GEP indices to the base.
-  return getAddExpr(BaseS, TotalOffset,
-                    isInBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap);
+  return getAddExpr(BaseS, TotalOffset, Wrap);
 }
 
 /// GetMinTrailingZeros - Determine the minimum number of zero bits that S is
@@ -4616,25 +4611,17 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
     if (EL.hasAnyInfo()) return EL;
     break;
   }
-  case ICmpInst::ICMP_SLT: {
-    ExitLimit EL = HowManyLessThans(LHS, RHS, L, true, IsSubExpr);
-    if (EL.hasAnyInfo()) return EL;
-    break;
-  }
-  case ICmpInst::ICMP_SGT: {
-    ExitLimit EL = HowManyLessThans(getNotSCEV(LHS),
-                                    getNotSCEV(RHS), L, true, IsSubExpr);
-    if (EL.hasAnyInfo()) return EL;
-    break;
-  }
-  case ICmpInst::ICMP_ULT: {
-    ExitLimit EL = HowManyLessThans(LHS, RHS, L, false, IsSubExpr);
+  case ICmpInst::ICMP_SLT:
+  case ICmpInst::ICMP_ULT: {                    // while (X < Y)
+    bool IsSigned = Cond == ICmpInst::ICMP_SLT;
+    ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, IsSubExpr);
     if (EL.hasAnyInfo()) return EL;
     break;
   }
-  case ICmpInst::ICMP_UGT: {
-    ExitLimit EL = HowManyLessThans(getNotSCEV(LHS),
-                                    getNotSCEV(RHS), L, false, IsSubExpr);
+  case ICmpInst::ICMP_SGT:
+  case ICmpInst::ICMP_UGT: {                    // while (X > Y)
+    bool IsSigned = Cond == ICmpInst::ICMP_SGT;
+    ExitLimit EL = HowManyGreaterThans(LHS, RHS, L, IsSigned, IsSubExpr);
     if (EL.hasAnyInfo()) return EL;
     break;
   }
@@ -5072,15 +5059,21 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
 /// original value V is returned.
 const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
   // Check to see if we've folded this expression at this loop before.
-  std::map<const Loop *, const SCEV *> &Values = ValuesAtScopes[V];
-  std::pair<std::map<const Loop *, const SCEV *>::iterator, bool> Pair =
-    Values.insert(std::make_pair(L, static_cast<const SCEV *>(0)));
-  if (!Pair.second)
-    return Pair.first->second ? Pair.first->second : V;
-
+  SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values = ValuesAtScopes[V];
+  for (unsigned u = 0; u < Values.size(); u++) {
+    if (Values[u].first == L)
+      return Values[u].second ? Values[u].second : V;
+  }
+  Values.push_back(std::make_pair(L, static_cast<const SCEV *>(0)));
   // Otherwise compute it.
   const SCEV *C = computeSCEVAtScope(V, L);
-  ValuesAtScopes[V][L] = C;
+  SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values2 = ValuesAtScopes[V];
+  for (unsigned u = Values2.size(); u > 0; u--) {
+    if (Values2[u - 1].first == L) {
+      Values2[u - 1].second = C;
+      break;
+    }
+  }
   return C;
 }
 
@@ -5119,18 +5112,23 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
     case scAddExpr: {
       const SCEVAddExpr *SA = cast<SCEVAddExpr>(V);
       if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) {
-        if (C->getType()->isPointerTy())
-          C = ConstantExpr::getBitCast(C, Type::getInt8PtrTy(C->getContext()));
+        if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
+          unsigned AS = PTy->getAddressSpace();
+          Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
+          C = ConstantExpr::getBitCast(C, DestPtrTy);
+        }
         for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) {
           Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i));
           if (!C2) return 0;
 
           // First pointer!
           if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) {
+            unsigned AS = C2->getType()->getPointerAddressSpace();
             std::swap(C, C2);
+            Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS);
             // The offsets have been converted to bytes.  We can add bytes to an
             // i8* by GEP with the byte count in the first index.
-            C = ConstantExpr::getBitCast(C,Type::getInt8PtrTy(C->getContext()));
+            C = ConstantExpr::getBitCast(C, DestPtrTy);
           }
 
           // Don't bother trying to sum two pointers. We probably can't
@@ -5138,8 +5136,8 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
           if (C2->getType()->isPointerTy())
             return 0;
 
-          if (C->getType()->isPointerTy()) {
-            if (cast<PointerType>(C->getType())->getElementType()->isStructTy())
+          if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
+            if (PTy->getElementType()->isStructTy())
               C2 = ConstantExpr::getIntegerCast(
                   C2, Type::getInt32Ty(C->getContext()), true);
             C = ConstantExpr::getGetElementPtr(C, C2);
@@ -6336,45 +6334,72 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
   return false;
 }
 
-/// getBECount - Subtract the end and start values and divide by the step,
-/// rounding up, to get the number of times the backedge is executed. Return
-/// CouldNotCompute if an intermediate computation overflows.
-const SCEV *ScalarEvolution::getBECount(const SCEV *Start,
-                                        const SCEV *End,
-                                        const SCEV *Step,
-                                        bool NoWrap) {
-  assert(!isKnownNegative(Step) &&
-         "This code doesn't handle negative strides yet!");
-
-  Type *Ty = Start->getType();
-
-  // When Start == End, we have an exact BECount == 0. Short-circuit this case
-  // here because SCEV may not be able to determine that the unsigned division
-  // after rounding is zero.
-  if (Start == End)
-    return getConstant(Ty, 0);
-
-  const SCEV *NegOne = getConstant(Ty, (uint64_t)-1);
-  const SCEV *Diff = getMinusSCEV(End, Start);
-  const SCEV *RoundUp = getAddExpr(Step, NegOne);
-
-  // Add an adjustment to the difference between End and Start so that
-  // the division will effectively round up.
-  const SCEV *Add = getAddExpr(Diff, RoundUp);
-
-  if (!NoWrap) {
-    // Check Add for unsigned overflow.
-    // TODO: More sophisticated things could be done here.
-    Type *WideTy = IntegerType::get(getContext(),
-                                          getTypeSizeInBits(Ty) + 1);
-    const SCEV *EDiff = getZeroExtendExpr(Diff, WideTy);
-    const SCEV *ERoundUp = getZeroExtendExpr(RoundUp, WideTy);
-    const SCEV *OperandExtendedAdd = getAddExpr(EDiff, ERoundUp);
-    if (getZeroExtendExpr(Add, WideTy) != OperandExtendedAdd)
-      return getCouldNotCompute();
+// Verify if an linear IV with positive stride can overflow when in a 
+// less-than comparison, knowing the invariant term of the comparison, the 
+// stride and the knowledge of NSW/NUW flags on the recurrence.
+bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,
+                                         bool IsSigned, bool NoWrap) {
+  if (NoWrap) return false;
+
+  unsigned BitWidth = getTypeSizeInBits(RHS->getType());
+  const SCEV *One = getConstant(Stride->getType(), 1);
+
+  if (IsSigned) {
+    APInt MaxRHS = getSignedRange(RHS).getSignedMax();
+    APInt MaxValue = APInt::getSignedMaxValue(BitWidth);
+    APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One))
+                                .getSignedMax();
+
+    // SMaxRHS + SMaxStrideMinusOne > SMaxValue => overflow!
+    return (MaxValue - MaxStrideMinusOne).slt(MaxRHS);
+  }
+
+  APInt MaxRHS = getUnsignedRange(RHS).getUnsignedMax();
+  APInt MaxValue = APInt::getMaxValue(BitWidth);
+  APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One))
+                              .getUnsignedMax();
+
+  // UMaxRHS + UMaxStrideMinusOne > UMaxValue => overflow!
+  return (MaxValue - MaxStrideMinusOne).ult(MaxRHS);
+}
+
+// Verify if an linear IV with negative stride can overflow when in a 
+// greater-than comparison, knowing the invariant term of the comparison,
+// the stride and the knowledge of NSW/NUW flags on the recurrence.
+bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,
+                                         bool IsSigned, bool NoWrap) {
+  if (NoWrap) return false;
+
+  unsigned BitWidth = getTypeSizeInBits(RHS->getType());
+  const SCEV *One = getConstant(Stride->getType(), 1);
+
+  if (IsSigned) {
+    APInt MinRHS = getSignedRange(RHS).getSignedMin();
+    APInt MinValue = APInt::getSignedMinValue(BitWidth);
+    APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One))
+                               .getSignedMax();
+
+    // SMinRHS - SMaxStrideMinusOne < SMinValue => overflow!
+    return (MinValue + MaxStrideMinusOne).sgt(MinRHS);
   }
 
-  return getUDivExpr(Add, Step);
+  APInt MinRHS = getUnsignedRange(RHS).getUnsignedMin();
+  APInt MinValue = APInt::getMinValue(BitWidth);
+  APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One))
+                            .getUnsignedMax();
+
+  // UMinRHS - UMaxStrideMinusOne < UMinValue => overflow!
+  return (MinValue + MaxStrideMinusOne).ugt(MinRHS);
+}
+
+// Compute the backedge taken count knowing the interval difference, the
+// stride and presence of the equality in the comparison.
+const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step, 
+                                            bool Equality) {
+  const SCEV *One = getConstant(Step->getType(), 1);
+  Delta = Equality ? getAddExpr(Delta, Step)
+                   : getAddExpr(Delta, getMinusSCEV(Step, One));
+  return getUDivExpr(Delta, Step);
 }
 
 /// HowManyLessThans - Return the number of times a backedge containing the
@@ -6386,119 +6411,144 @@ const SCEV *ScalarEvolution::getBECount(const SCEV *Start,
 /// a subexpression that cannot overflow before evaluating true.
 ScalarEvolution::ExitLimit
 ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
-                                  const Loop *L, bool isSigned,
+                                  const Loop *L, bool IsSigned,
                                   bool IsSubExpr) {
-  // Only handle:  "ADDREC < LoopInvariant".
-  if (!isLoopInvariant(RHS, L)) return getCouldNotCompute();
+  // We handle only IV < Invariant
+  if (!isLoopInvariant(RHS, L))
+    return getCouldNotCompute();
 
-  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS);
-  if (!AddRec || AddRec->getLoop() != L)
+  const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
+
+  // Avoid weird loops
+  if (!IV || IV->getLoop() != L || !IV->isAffine())
     return getCouldNotCompute();
 
-  // Check to see if we have a flag which makes analysis easy.
-  bool NoWrap = false;
-  if (!IsSubExpr) {
-    NoWrap = AddRec->getNoWrapFlags(
-      (SCEV::NoWrapFlags)(((isSigned ? SCEV::FlagNSW : SCEV::FlagNUW))
-                          | SCEV::FlagNW));
-  }
-  if (AddRec->isAffine()) {
-    unsigned BitWidth = getTypeSizeInBits(AddRec->getType());
-    const SCEV *Step = AddRec->getStepRecurrence(*this);
+  bool NoWrap = !IsSubExpr &&
+                IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW);
 
-    if (Step->isZero())
-      return getCouldNotCompute();
-    if (Step->isOne()) {
-      // With unit stride, the iteration never steps past the limit value.
-    } else if (isKnownPositive(Step)) {
-      // Test whether a positive iteration can step past the limit
-      // value and past the maximum value for its type in a single step.
-      // Note that it's not sufficient to check NoWrap here, because even
-      // though the value after a wrap is undefined, it's not undefined
-      // behavior, so if wrap does occur, the loop could either terminate or
-      // loop infinitely, but in either case, the loop is guaranteed to
-      // iterate at least until the iteration where the wrapping occurs.
-      const SCEV *One = getConstant(Step->getType(), 1);
-      if (isSigned) {
-        APInt Max = APInt::getSignedMaxValue(BitWidth);
-        if ((Max - getSignedRange(getMinusSCEV(Step, One)).getSignedMax())
-              .slt(getSignedRange(RHS).getSignedMax()))
-          return getCouldNotCompute();
-      } else {
-        APInt Max = APInt::getMaxValue(BitWidth);
-        if ((Max - getUnsignedRange(getMinusSCEV(Step, One)).getUnsignedMax())
-              .ult(getUnsignedRange(RHS).getUnsignedMax()))
-          return getCouldNotCompute();
-      }
-    } else
-      // TODO: Handle negative strides here and below.
-      return getCouldNotCompute();
+  const SCEV *Stride = IV->getStepRecurrence(*this);
 
-    // We know the LHS is of the form {n,+,s} and the RHS is some loop-invariant
-    // m.  So, we count the number of iterations in which {n,+,s} < m is true.
-    // Note that we cannot simply return max(m-n,0)/s because it's not safe to
-    // treat m-n as signed nor unsigned due to overflow possibility.
-
-    // First, we get the value of the LHS in the first iteration: n
-    const SCEV *Start = AddRec->getOperand(0);
-
-    // Determine the minimum constant start value.
-    const SCEV *MinStart = getConstant(isSigned ?
-      getSignedRange(Start).getSignedMin() :
-      getUnsignedRange(Start).getUnsignedMin());
-
-    // If we know that the condition is true in order to enter the loop,
-    // then we know that it will run exactly (m-n)/s times. Otherwise, we
-    // only know that it will execute (max(m,n)-n)/s times. In both cases,
-    // the division must round up.
-    const SCEV *End = RHS;
-    if (!isLoopEntryGuardedByCond(L,
-                                  isSigned ? ICmpInst::ICMP_SLT :
-                                             ICmpInst::ICMP_ULT,
-                                  getMinusSCEV(Start, Step), RHS))
-      End = isSigned ? getSMaxExpr(RHS, Start)
-                     : getUMaxExpr(RHS, Start);
-
-    // Determine the maximum constant end value.
-    const SCEV *MaxEnd = getConstant(isSigned ?
-      getSignedRange(End).getSignedMax() :
-      getUnsignedRange(End).getUnsignedMax());
-
-    // If MaxEnd is within a step of the maximum integer value in its type,
-    // adjust it down to the minimum value which would produce the same effect.
-    // This allows the subsequent ceiling division of (N+(step-1))/step to
-    // compute the correct value.
-    const SCEV *StepMinusOne = getMinusSCEV(Step,
-                                            getConstant(Step->getType(), 1));
-    MaxEnd = isSigned ?
-      getSMinExpr(MaxEnd,
-                  getMinusSCEV(getConstant(APInt::getSignedMaxValue(BitWidth)),
-                               StepMinusOne)) :
-      getUMinExpr(MaxEnd,
-                  getMinusSCEV(getConstant(APInt::getMaxValue(BitWidth)),
-                               StepMinusOne));
-
-    // Finally, we subtract these two values and divide, rounding up, to get
-    // the number of times the backedge is executed.
-    const SCEV *BECount = getBECount(Start, End, Step, NoWrap);
-
-    // The maximum backedge count is similar, except using the minimum start
-    // value and the maximum end value.
-    // If we already have an exact constant BECount, use it instead.
-    const SCEV *MaxBECount = isa<SCEVConstant>(BECount) ? BECount
-      : getBECount(MinStart, MaxEnd, Step, NoWrap);
-
-    // If the stride is nonconstant, and NoWrap == true, then
-    // getBECount(MinStart, MaxEnd) may not compute. This would result in an
-    // exact BECount and invalid MaxBECount, which should be avoided to catch
-    // more optimization opportunities.
-    if (isa<SCEVCouldNotCompute>(MaxBECount))
-      MaxBECount = BECount;
-
-    return ExitLimit(BECount, MaxBECount);
-  }
+  // Avoid negative or zero stride values
+  if (!isKnownPositive(Stride))
+    return getCouldNotCompute();
 
-  return getCouldNotCompute();
+  // Avoid proven overflow cases: this will ensure that the backedge taken count
+  // will not generate any unsigned overflow. Relaxed no-overflow conditions
+  // exploit NoWrapFlags, allowing to optimize in presence of undefined 
+  // behaviors like the case of C language.
+  if (!Stride->isOne() && doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap))
+    return getCouldNotCompute();
+
+  ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT
+                                      : ICmpInst::ICMP_ULT;
+  const SCEV *Start = IV->getStart();
+  const SCEV *End = RHS;
+  if (!isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS))
+    End = IsSigned ? getSMaxExpr(RHS, Start)
+                   : getUMaxExpr(RHS, Start);
+
+  const SCEV *BECount = computeBECount(getMinusSCEV(End, Start), Stride, false);
+
+  APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin()
+                            : getUnsignedRange(Start).getUnsignedMin();
+
+  APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin()
+                             : getUnsignedRange(Stride).getUnsignedMin();
+
+  unsigned BitWidth = getTypeSizeInBits(LHS->getType());
+  APInt Limit = IsSigned ? APInt::getSignedMaxValue(BitWidth) - (MinStride - 1)
+                         : APInt::getMaxValue(BitWidth) - (MinStride - 1);
+
+  // Although End can be a MAX expression we estimate MaxEnd considering only
+  // the case End = RHS. This is safe because in the other case (End - Start)
+  // is zero, leading to a zero maximum backedge taken count.
+  APInt MaxEnd =
+    IsSigned ? APIntOps::smin(getSignedRange(RHS).getSignedMax(), Limit)
+             : APIntOps::umin(getUnsignedRange(RHS).getUnsignedMax(), Limit);
+
+  const SCEV *MaxBECount = getCouldNotCompute();
+  if (isa<SCEVConstant>(BECount))
+    MaxBECount = BECount;
+  else
+    MaxBECount = computeBECount(getConstant(MaxEnd - MinStart),
+                                getConstant(MinStride), false);
+
+  if (isa<SCEVCouldNotCompute>(MaxBECount))
+    MaxBECount = BECount;
+
+  return ExitLimit(BECount, MaxBECount);
+}
+
+ScalarEvolution::ExitLimit
+ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
+                                     const Loop *L, bool IsSigned,
+                                     bool IsSubExpr) {
+  // We handle only IV > Invariant
+  if (!isLoopInvariant(RHS, L))
+    return getCouldNotCompute();
+
+  const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
+
+  // Avoid weird loops
+  if (!IV || IV->getLoop() != L || !IV->isAffine())
+    return getCouldNotCompute();
+
+  bool NoWrap = !IsSubExpr &&
+                IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW);
+
+  const SCEV *Stride = getNegativeSCEV(IV->getStepRecurrence(*this));
+
+  // Avoid negative or zero stride values
+  if (!isKnownPositive(Stride))
+    return getCouldNotCompute();
+
+  // Avoid proven overflow cases: this will ensure that the backedge taken count
+  // will not generate any unsigned overflow. Relaxed no-overflow conditions
+  // exploit NoWrapFlags, allowing to optimize in presence of undefined 
+  // behaviors like the case of C language.
+  if (!Stride->isOne() && doesIVOverflowOnGT(RHS, Stride, IsSigned, NoWrap))
+    return getCouldNotCompute();
+
+  ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SGT
+                                      : ICmpInst::ICMP_UGT;
+
+  const SCEV *Start = IV->getStart();
+  const SCEV *End = RHS;
+  if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS))
+    End = IsSigned ? getSMinExpr(RHS, Start)
+                   : getUMinExpr(RHS, Start);
+
+  const SCEV *BECount = computeBECount(getMinusSCEV(Start, End), Stride, false);
+
+  APInt MaxStart = IsSigned ? getSignedRange(Start).getSignedMax()
+                            : getUnsignedRange(Start).getUnsignedMax();
+
+  APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin()
+                             : getUnsignedRange(Stride).getUnsignedMin();
+
+  unsigned BitWidth = getTypeSizeInBits(LHS->getType());
+  APInt Limit = IsSigned ? APInt::getSignedMinValue(BitWidth) + (MinStride - 1)
+                         : APInt::getMinValue(BitWidth) + (MinStride - 1);
+
+  // Although End can be a MIN expression we estimate MinEnd considering only
+  // the case End = RHS. This is safe because in the other case (Start - End)
+  // is zero, leading to a zero maximum backedge taken count.
+  APInt MinEnd =
+    IsSigned ? APIntOps::smax(getSignedRange(RHS).getSignedMin(), Limit)
+             : APIntOps::umax(getUnsignedRange(RHS).getUnsignedMin(), Limit);
+
+
+  const SCEV *MaxBECount = getCouldNotCompute();
+  if (isa<SCEVConstant>(BECount))
+    MaxBECount = BECount;
+  else
+    MaxBECount = computeBECount(getConstant(MaxStart - MinEnd), 
+                                getConstant(MinStride), false);
+
+  if (isa<SCEVCouldNotCompute>(MaxBECount))
+    MaxBECount = BECount;
+
+  return ExitLimit(BECount, MaxBECount);
 }
 
 /// getNumIterationsInRange - Return the number of iterations of this loop that
@@ -6627,7 +6677,534 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
   return SE.getCouldNotCompute();
 }
 
+static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) {
+  APInt A = C1->getValue()->getValue().abs();
+  APInt B = C2->getValue()->getValue().abs();
+  uint32_t ABW = A.getBitWidth();
+  uint32_t BBW = B.getBitWidth();
+
+  if (ABW > BBW)
+    B = B.zext(ABW);
+  else if (ABW < BBW)
+    A = A.zext(BBW);
+
+  return APIntOps::GreatestCommonDivisor(A, B);
+}
+
+static const APInt srem(const SCEVConstant *C1, const SCEVConstant *C2) {
+  APInt A = C1->getValue()->getValue();
+  APInt B = C2->getValue()->getValue();
+  uint32_t ABW = A.getBitWidth();
+  uint32_t BBW = B.getBitWidth();
+
+  if (ABW > BBW)
+    B = B.sext(ABW);
+  else if (ABW < BBW)
+    A = A.sext(BBW);
+
+  return APIntOps::srem(A, B);
+}
+
+static const APInt sdiv(const SCEVConstant *C1, const SCEVConstant *C2) {
+  APInt A = C1->getValue()->getValue();
+  APInt B = C2->getValue()->getValue();
+  uint32_t ABW = A.getBitWidth();
+  uint32_t BBW = B.getBitWidth();
+
+  if (ABW > BBW)
+    B = B.sext(ABW);
+  else if (ABW < BBW)
+    A = A.sext(BBW);
+
+  return APIntOps::sdiv(A, B);
+}
+
+namespace {
+struct SCEVGCD : public SCEVVisitor<SCEVGCD, const SCEV *> {
+public:
+  // Pattern match Step into Start. When Step is a multiply expression, find
+  // the largest subexpression of Step that appears in Start. When Start is an
+  // add expression, try to match Step in the subexpressions of Start, non
+  // matching subexpressions are returned under Remainder.
+  static const SCEV *findGCD(ScalarEvolution &SE, const SCEV *Start,
+                             const SCEV *Step, const SCEV **Remainder) {
+    assert(Remainder && "Remainder should not be NULL");
+    SCEVGCD R(SE, Step, SE.getConstant(Step->getType(), 0));
+    const SCEV *Res = R.visit(Start);
+    *Remainder = R.Remainder;
+    return Res;
+  }
+
+  SCEVGCD(ScalarEvolution &S, const SCEV *G, const SCEV *R)
+      : SE(S), GCD(G), Remainder(R) {
+    Zero = SE.getConstant(GCD->getType(), 0);
+    One = SE.getConstant(GCD->getType(), 1);
+  }
+
+  const SCEV *visitConstant(const SCEVConstant *Constant) {
+    if (GCD == Constant || Constant == Zero)
+      return GCD;
+
+    if (const SCEVConstant *CGCD = dyn_cast<SCEVConstant>(GCD)) {
+      const SCEV *Res = SE.getConstant(gcd(Constant, CGCD));
+      if (Res != One)
+        return Res;
+
+      Remainder = SE.getConstant(srem(Constant, CGCD));
+      Constant = cast<SCEVConstant>(SE.getMinusSCEV(Constant, Remainder));
+      Res = SE.getConstant(gcd(Constant, CGCD));
+      return Res;
+    }
+
+    // When GCD is not a constant, it could be that the GCD is an Add, Mul,
+    // AddRec, etc., in which case we want to find out how many times the
+    // Constant divides the GCD: we then return that as the new GCD.
+    const SCEV *Rem = Zero;
+    const SCEV *Res = findGCD(SE, GCD, Constant, &Rem);
+
+    if (Res == One || Rem != Zero) {
+      Remainder = Constant;
+      return One;
+    }
+
+    assert(isa<SCEVConstant>(Res) && "Res should be a constant");
+    Remainder = SE.getConstant(srem(Constant, cast<SCEVConstant>(Res)));
+    return Res;
+  }
+
+  const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
+    if (GCD != Expr)
+      Remainder = Expr;
+    return GCD;
+  }
+
+  const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
+    if (GCD != Expr)
+      Remainder = Expr;
+    return GCD;
+  }
+
+  const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
+    if (GCD != Expr)
+      Remainder = Expr;
+    return GCD;
+  }
+
+  const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
+    if (GCD == Expr)
+      return GCD;
+
+    for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
+      const SCEV *Rem = Zero;
+      const SCEV *Res = findGCD(SE, Expr->getOperand(e - 1 - i), GCD, &Rem);
+
+      // FIXME: There may be ambiguous situations: for instance,
+      // GCD(-4 + (3 * %m), 2 * %m) where 2 divides -4 and %m divides (3 * %m).
+      // The order in which the AddExpr is traversed computes a different GCD
+      // and Remainder.
+      if (Res != One)
+        GCD = Res;
+      if (Rem != Zero)
+        Remainder = SE.getAddExpr(Remainder, Rem);
+    }
+
+    return GCD;
+  }
+
+  const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
+    if (GCD == Expr)
+      return GCD;
+
+    for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
+      if (Expr->getOperand(i) == GCD)
+        return GCD;
+    }
+
+    // If we have not returned yet, it means that GCD is not part of Expr.
+    const SCEV *PartialGCD = One;
+    for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
+      const SCEV *Rem = Zero;
+      const SCEV *Res = findGCD(SE, Expr->getOperand(i), GCD, &Rem);
+      if (Rem != Zero)
+        // GCD does not divide Expr->getOperand(i).
+        continue;
+
+      if (Res == GCD)
+        return GCD;
+      PartialGCD = SE.getMulExpr(PartialGCD, Res);
+      if (PartialGCD == GCD)
+        return GCD;
+    }
+
+    if (PartialGCD != One)
+      return PartialGCD;
+
+    Remainder = Expr;
+    const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(GCD);
+    if (!Mul)
+      return PartialGCD;
+
+    // When the GCD is a multiply expression, try to decompose it:
+    // this occurs when Step does not divide the Start expression
+    // as in: {(-4 + (3 * %m)),+,(2 * %m)}
+    for (int i = 0, e = Mul->getNumOperands(); i < e; ++i) {
+      const SCEV *Rem = Zero;
+      const SCEV *Res = findGCD(SE, Expr, Mul->getOperand(i), &Rem);
+      if (Rem == Zero) {
+        Remainder = Rem;
+        return Res;
+      }
+    }
+
+    return PartialGCD;
+  }
+
+  const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
+    if (GCD != Expr)
+      Remainder = Expr;
+    return GCD;
+  }
+
+  const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
+    if (GCD == Expr)
+      return GCD;
+
+    if (!Expr->isAffine()) {
+      Remainder = Expr;
+      return GCD;
+    }
+
+    const SCEV *Rem = Zero;
+    const SCEV *Res = findGCD(SE, Expr->getOperand(0), GCD, &Rem);
+    if (Rem != Zero)
+      Remainder = SE.getAddExpr(Remainder, Rem);
+
+    Rem = Zero;
+    Res = findGCD(SE, Expr->getOperand(1), Res, &Rem);
+    if (Rem != Zero) {
+      Remainder = Expr;
+      return GCD;
+    }
+
+    return Res;
+  }
+
+  const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
+    if (GCD != Expr)
+      Remainder = Expr;
+    return GCD;
+  }
+
+  const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
+    if (GCD != Expr)
+      Remainder = Expr;
+    return GCD;
+  }
+
+  const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+    if (GCD != Expr)
+      Remainder = Expr;
+    return GCD;
+  }
+
+  const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
+    return One;
+  }
+
+private:
+  ScalarEvolution &SE;
+  const SCEV *GCD, *Remainder, *Zero, *One;
+};
+
+struct SCEVDivision : public SCEVVisitor<SCEVDivision, const SCEV *> {
+public:
+  // Remove from Start all multiples of Step.
+  static const SCEV *divide(ScalarEvolution &SE, const SCEV *Start,
+                            const SCEV *Step) {
+    SCEVDivision D(SE, Step);
+    const SCEV *Rem = D.Zero;
+    (void)Rem;
+    // The division is guaranteed to succeed: Step should divide Start with no
+    // remainder.
+    assert(Step == SCEVGCD::findGCD(SE, Start, Step, &Rem) && Rem == D.Zero &&
+           "Step should divide Start with no remainder.");
+    return D.visit(Start);
+  }
+
+  SCEVDivision(ScalarEvolution &S, const SCEV *G) : SE(S), GCD(G) {
+    Zero = SE.getConstant(GCD->getType(), 0);
+    One = SE.getConstant(GCD->getType(), 1);
+  }
+
+  const SCEV *visitConstant(const SCEVConstant *Constant) {
+    if (GCD == Constant)
+      return One;
+
+    if (const SCEVConstant *CGCD = dyn_cast<SCEVConstant>(GCD))
+      return SE.getConstant(sdiv(Constant, CGCD));
+    return Constant;
+  }
+
+  const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
+    if (GCD == Expr)
+      return One;
+    return Expr;
+  }
+
+  const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
+    if (GCD == Expr)
+      return One;
+    return Expr;
+  }
+
+  const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
+    if (GCD == Expr)
+      return One;
+    return Expr;
+  }
+
+  const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
+    if (GCD == Expr)
+      return One;
+
+    SmallVector<const SCEV *, 2> Operands;
+    for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
+      Operands.push_back(divide(SE, Expr->getOperand(i), GCD));
+
+    if (Operands.size() == 1)
+      return Operands[0];
+    return SE.getAddExpr(Operands);
+  }
+
+  const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
+    if (GCD == Expr)
+      return One;
+
+    bool FoundGCDTerm = false;
+    for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
+      if (Expr->getOperand(i) == GCD)
+        FoundGCDTerm = true;
+
+    SmallVector<const SCEV *, 2> Operands;
+    if (FoundGCDTerm) {
+      FoundGCDTerm = false;
+      for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
+        if (FoundGCDTerm)
+          Operands.push_back(Expr->getOperand(i));
+        else if (Expr->getOperand(i) == GCD)
+          FoundGCDTerm = true;
+        else
+          Operands.push_back(Expr->getOperand(i));
+      }
+    } else {
+      FoundGCDTerm = false;
+      const SCEV *PartialGCD = One;
+      for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
+        if (PartialGCD == GCD) {
+          Operands.push_back(Expr->getOperand(i));
+          continue;
+        }
+
+        const SCEV *Rem = Zero;
+        const SCEV *Res = SCEVGCD::findGCD(SE, Expr->getOperand(i), GCD, &Rem);
+        if (Rem == Zero) {
+          PartialGCD = SE.getMulExpr(PartialGCD, Res);
+          Operands.push_back(divide(SE, Expr->getOperand(i), GCD));
+        } else {
+          Operands.push_back(Expr->getOperand(i));
+        }
+      }
+    }
+
+    if (Operands.size() == 1)
+      return Operands[0];
+    return SE.getMulExpr(Operands);
+  }
+
+  const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
+    if (GCD == Expr)
+      return One;
+    return Expr;
+  }
+
+  const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
+    if (GCD == Expr)
+      return One;
+
+    assert(Expr->isAffine() && "Expr should be affine");
+
+    const SCEV *Start = divide(SE, Expr->getStart(), GCD);
+    const SCEV *Step = divide(SE, Expr->getStepRecurrence(SE), GCD);
+
+    return SE.getAddRecExpr(Start, Step, Expr->getLoop(),
+                            Expr->getNoWrapFlags());
+  }
+
+  const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
+    if (GCD == Expr)
+      return One;
+    return Expr;
+  }
+
+  const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
+    if (GCD == Expr)
+      return One;
+    return Expr;
+  }
+
+  const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+    if (GCD == Expr)
+      return One;
+    return Expr;
+  }
+
+  const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
+    return Expr;
+  }
+
+private:
+  ScalarEvolution &SE;
+  const SCEV *GCD, *Zero, *One;
+};
+}
+
+/// Splits the SCEV into two vectors of SCEVs representing the subscripts and
+/// sizes of an array access. Returns the remainder of the delinearization that
+/// is the offset start of the array.  The SCEV->delinearize algorithm computes
+/// the multiples of SCEV coefficients: that is a pattern matching of sub
+/// expressions in the stride and base of a SCEV corresponding to the
+/// computation of a GCD (greatest common divisor) of base and stride.  When
+/// SCEV->delinearize fails, it returns the SCEV unchanged.
+///
+/// For example: when analyzing the memory access A[i][j][k] in this loop nest
+///
+///  void foo(long n, long m, long o, double A[n][m][o]) {
+///
+///    for (long i = 0; i < n; i++)
+///      for (long j = 0; j < m; j++)
+///        for (long k = 0; k < o; k++)
+///          A[i][j][k] = 1.0;
+///  }
+///
+/// the delinearization input is the following AddRec SCEV:
+///
+///  AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
+///
+/// From this SCEV, we are able to say that the base offset of the access is %A
+/// because it appears as an offset that does not divide any of the strides in
+/// the loops:
+///
+///  CHECK: Base offset: %A
+///
+/// and then SCEV->delinearize determines the size of some of the dimensions of
+/// the array as these are the multiples by which the strides are happening:
+///
+///  CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes.
+///
+/// Note that the outermost dimension remains of UnknownSize because there are
+/// no strides that would help identifying the size of the last dimension: when
+/// the array has been statically allocated, one could compute the size of that
+/// dimension by dividing the overall size of the array by the size of the known
+/// dimensions: %m * %o * 8.
+///
+/// Finally delinearize provides the access functions for the array reference
+/// that does correspond to A[i][j][k] of the above C testcase:
+///
+///  CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
+///
+/// The testcases are checking the output of a function pass:
+/// DelinearizationPass that walks through all loads and stores of a function
+/// asking for the SCEV of the memory access with respect to all enclosing
+/// loops, calling SCEV->delinearize on that and printing the results.
 
+const SCEV *
+SCEVAddRecExpr::delinearize(ScalarEvolution &SE,
+                            SmallVectorImpl<const SCEV *> &Subscripts,
+                            SmallVectorImpl<const SCEV *> &Sizes) const {
+  // Early exit in case this SCEV is not an affine multivariate function.
+  if (!this->isAffine())
+    return this;
+
+  const SCEV *Start = this->getStart();
+  const SCEV *Step = this->getStepRecurrence(SE);
+
+  // Build the SCEV representation of the cannonical induction variable in the
+  // loop of this SCEV.
+  const SCEV *Zero = SE.getConstant(this->getType(), 0);
+  const SCEV *One = SE.getConstant(this->getType(), 1);
+  const SCEV *IV =
+      SE.getAddRecExpr(Zero, One, this->getLoop(), this->getNoWrapFlags());
+
+  DEBUG(dbgs() << "(delinearize: " << *this << "\n");
+
+  // Currently we fail to delinearize when the stride of this SCEV is 1. We
+  // could decide to not fail in this case: we could just return 1 for the size
+  // of the subscript, and this same SCEV for the access function.
+  if (Step == One) {
+    DEBUG(dbgs() << "failed to delinearize " << *this << "\n)\n");
+    return this;
+  }
+
+  // Find the GCD and Remainder of the Start and Step coefficients of this SCEV.
+  const SCEV *Remainder = NULL;
+  const SCEV *GCD = SCEVGCD::findGCD(SE, Start, Step, &Remainder);
+
+  DEBUG(dbgs() << "GCD: " << *GCD << "\n");
+  DEBUG(dbgs() << "Remainder: " << *Remainder << "\n");
+
+  // Same remark as above: we currently fail the delinearization, although we
+  // can very well handle this special case.
+  if (GCD == One) {
+    DEBUG(dbgs() << "failed to delinearize " << *this << "\n)\n");
+    return this;
+  }
+
+  // As findGCD computed Remainder, GCD divides "Start - Remainder." The
+  // Quotient is then this SCEV without Remainder, scaled down by the GCD.  The
+  // Quotient is what will be used in the next subscript delinearization.
+  const SCEV *Quotient =
+      SCEVDivision::divide(SE, SE.getMinusSCEV(Start, Remainder), GCD);
+  DEBUG(dbgs() << "Quotient: " << *Quotient << "\n");
+
+  const SCEV *Rem;
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Quotient))
+    // Recursively call delinearize on the Quotient until there are no more
+    // multiples that can be recognized.
+    Rem = AR->delinearize(SE, Subscripts, Sizes);
+  else
+    Rem = Quotient;
+
+  // Scale up the cannonical induction variable IV by whatever remains from the
+  // Step after division by the GCD: the GCD is the size of all the sub-array.
+  if (Step != GCD) {
+    Step = SCEVDivision::divide(SE, Step, GCD);
+    IV = SE.getMulExpr(IV, Step);
+  }
+  // The access function in the current subscript is computed as the cannonical
+  // induction variable IV (potentially scaled up by the step) and offset by
+  // Rem, the offset of delinearization in the sub-array.
+  const SCEV *Index = SE.getAddExpr(IV, Rem);
+
+  // Record the access function and the size of the current subscript.
+  Subscripts.push_back(Index);
+  Sizes.push_back(GCD);
+
+#ifndef NDEBUG
+  int Size = Sizes.size();
+  DEBUG(dbgs() << "succeeded to delinearize " << *this << "\n");
+  DEBUG(dbgs() << "ArrayDecl[UnknownSize]");
+  for (int i = 0; i < Size - 1; i++)
+    DEBUG(dbgs() << "[" << *Sizes[i] << "]");
+  DEBUG(dbgs() << " with elements of " << *Sizes[Size - 1] << " bytes.\n");
+
+  DEBUG(dbgs() << "ArrayRef");
+  for (int i = 0; i < Size; i++)
+    DEBUG(dbgs() << "[" << *Subscripts[i] << "]");
+  DEBUG(dbgs() << "\n)\n");
+#endif
+
+  return Remainder;
+}
 
 //===----------------------------------------------------------------------===//
 //                   SCEVCallbackVH Class Implementation
@@ -6683,7 +7260,7 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
 //===----------------------------------------------------------------------===//
 
 ScalarEvolution::ScalarEvolution()
-  : FunctionPass(ID), FirstUnknown(0) {
+  : FunctionPass(ID), ValuesAtScopes(64), LoopDispositions(64), BlockDispositions(64), FirstUnknown(0) {
   initializeScalarEvolutionPass(*PassRegistry::getPassRegistry());
 }
 
@@ -6821,14 +7398,21 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
 
 ScalarEvolution::LoopDisposition
 ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) {
-  std::map<const Loop *, LoopDisposition> &Values = LoopDispositions[S];
-  std::pair<std::map<const Loop *, LoopDisposition>::iterator, bool> Pair =
-    Values.insert(std::make_pair(L, LoopVariant));
-  if (!Pair.second)
-    return Pair.first->second;
-
+  SmallVector<std::pair<const Loop *, LoopDisposition>, 2> &Values = LoopDispositions[S];
+  for (unsigned u = 0; u < Values.size(); u++) {
+    if (Values[u].first == L)
+      return Values[u].second;
+  }
+  Values.push_back(std::make_pair(L, LoopVariant));
   LoopDisposition D = computeLoopDisposition(S, L);
-  return LoopDispositions[S][L] = D;
+  SmallVector<std::pair<const Loop *, LoopDisposition>, 2> &Values2 = LoopDispositions[S];
+  for (unsigned u = Values2.size(); u > 0; u--) {
+    if (Values2[u - 1].first == L) {
+      Values2[u - 1].second = D;
+      break;
+    }
+  }
+  return D;
 }
 
 ScalarEvolution::LoopDisposition
@@ -6920,14 +7504,21 @@ bool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) {
 
 ScalarEvolution::BlockDisposition
 ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) {
-  std::map<const BasicBlock *, BlockDisposition> &Values = BlockDispositions[S];
-  std::pair<std::map<const BasicBlock *, BlockDisposition>::iterator, bool>
-    Pair = Values.insert(std::make_pair(BB, DoesNotDominateBlock));
-  if (!Pair.second)
-    return Pair.first->second;
-
+  SmallVector<std::pair<const BasicBlock *, BlockDisposition>, 2> &Values = BlockDispositions[S];
+  for (unsigned u = 0; u < Values.size(); u++) {
+    if (Values[u].first == BB)
+      return Values[u].second;
+  }
+  Values.push_back(std::make_pair(BB, DoesNotDominateBlock));
   BlockDisposition D = computeBlockDisposition(S, BB);
-  return BlockDispositions[S][BB] = D;
+  SmallVector<std::pair<const BasicBlock *, BlockDisposition>, 2> &Values2 = BlockDispositions[S];
+  for (unsigned u = Values2.size(); u > 0; u--) {
+    if (Values2[u - 1].first == BB) {
+      Values2[u - 1].second = D;
+      break;
+    }
+  }
+  return D;
 }
 
 ScalarEvolution::BlockDisposition
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index c434b40..86a557b 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -14,6 +14,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
@@ -176,8 +177,8 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
   }
 
   // Save the original insertion point so we can restore it when we're done.
-  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
-  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+  DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc();
+  BuilderType::InsertPointGuard Guard(Builder);
 
   // Move the insertion point out of as many loops as we can.
   while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
@@ -191,13 +192,9 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
 
   // If we haven't found this binop, insert it.
   Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS));
-  BO->setDebugLoc(SaveInsertPt->getDebugLoc());
+  BO->setDebugLoc(Loc);
   rememberInstruction(BO);
 
-  // Restore the original insert point.
-  if (SaveInsertBB)
-    restoreInsertPoint(SaveInsertBB, SaveInsertPt);
-
   return BO;
 }
 
@@ -406,6 +403,10 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
   // without the other.
   SplitAddRecs(Ops, Ty, SE);
 
+  Type *IntPtrTy = SE.TD
+                 ? SE.TD->getIntPtrType(PTy)
+                 : Type::getInt64Ty(PTy->getContext());
+
   // Descend down the pointer's type and attempt to convert the other
   // operands into GEP indices, at each level. The first index in a GEP
   // indexes into the array implied by the pointer operand; the rest of
@@ -416,7 +417,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
     // array indexing.
     SmallVector<const SCEV *, 8> ScaledOps;
     if (ElTy->isSized()) {
-      const SCEV *ElSize = SE.getSizeOfExpr(ElTy);
+      const SCEV *ElSize = SE.getSizeOfExpr(IntPtrTy, ElTy);
       if (!ElSize->isZero()) {
         SmallVector<const SCEV *, 8> NewOps;
         for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
@@ -548,8 +549,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
     }
 
     // Save the original insertion point so we can restore it when we're done.
-    BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
-    BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+    BuilderType::InsertPointGuard Guard(Builder);
 
     // Move the insertion point out of as many loops as we can.
     while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
@@ -565,16 +565,11 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
     Value *GEP = Builder.CreateGEP(V, Idx, "uglygep");
     rememberInstruction(GEP);
 
-    // Restore the original insert point.
-    if (SaveInsertBB)
-      restoreInsertPoint(SaveInsertBB, SaveInsertPt);
-
     return GEP;
   }
 
   // Save the original insertion point so we can restore it when we're done.
-  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
-  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+  BuilderType::InsertPoint SaveInsertPt = Builder.saveIP();
 
   // Move the insertion point out of as many loops as we can.
   while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
@@ -610,8 +605,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
   rememberInstruction(GEP);
 
   // Restore the original insert point.
-  if (SaveInsertBB)
-    restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+  Builder.restoreIP(SaveInsertPt);
 
   return expand(SE.getAddExpr(Ops));
 }
@@ -1076,8 +1070,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
   }
 
   // Save the original insertion point so we can restore it when we're done.
-  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
-  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+  BuilderType::InsertPointGuard Guard(Builder);
 
   // Another AddRec may need to be recursively expanded below. For example, if
   // this AddRec is quadratic, the StepV may itself be an AddRec in this
@@ -1144,10 +1137,6 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
     PN->addIncoming(IncV, Pred);
   }
 
-  // Restore the original insert point.
-  if (SaveInsertBB)
-    restoreInsertPoint(SaveInsertBB, SaveInsertPt);
-
   // After expanding subexpressions, restore the PostIncLoops set so the caller
   // can ensure that IVIncrement dominates the current uses.
   PostIncLoops = SavedPostIncLoops;
@@ -1232,19 +1221,19 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
         !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
       if (useSubtract)
         Step = SE.getNegativeSCEV(Step);
-      // Expand the step somewhere that dominates the loop header.
-      BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
-      BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
-      Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
-      // Restore the insertion point to the place where the caller has
-      // determined dominates all uses.
-      restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+      Value *StepV;
+      {
+        // Expand the step somewhere that dominates the loop header.
+        BuilderType::InsertPointGuard Guard(Builder);
+        StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
+      }
       Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
     }
   }
 
   // Re-apply any non-loop-dominating scale.
   if (PostLoopScale) {
+    assert(S->isAffine() && "Can't linearly scale non-affine recurrences.");
     Result = InsertNoopCastOfTo(Result, IntTy);
     Result = Builder.CreateMul(Result,
                                expandCodeFor(PostLoopScale, IntTy));
@@ -1289,16 +1278,14 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
       NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType());
     Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(),
                                        S->getNoWrapFlags(SCEV::FlagNW)));
-    BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
-    BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
     BasicBlock::iterator NewInsertPt =
       llvm::next(BasicBlock::iterator(cast<Instruction>(V)));
+    BuilderType::InsertPointGuard Guard(Builder);
     while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt) ||
            isa<LandingPadInst>(NewInsertPt))
       ++NewInsertPt;
     V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0,
                       NewInsertPt);
-    restoreInsertPoint(SaveInsertBB, SaveInsertPt);
     return V;
   }
 
@@ -1342,9 +1329,13 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
                                   Header->begin());
     rememberInstruction(CanonicalIV);
 
+    SmallSet<BasicBlock *, 4> PredSeen;
     Constant *One = ConstantInt::get(Ty, 1);
     for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
       BasicBlock *HP = *HPI;
+      if (!PredSeen.insert(HP))
+        continue;
+
       if (L->contains(HP)) {
         // Insert a unit add instruction right before the terminator
         // corresponding to the back-edge.
@@ -1527,8 +1518,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
   if (I != InsertedExpressions.end())
     return I->second;
 
-  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
-  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+  BuilderType::InsertPointGuard Guard(Builder);
   Builder.SetInsertPoint(InsertPt->getParent(), InsertPt);
 
   // Expand the expression into instructions.
@@ -1541,8 +1531,6 @@ Value *SCEVExpander::expand(const SCEV *S) {
   // a postinc expansion, it could be reused by a non postinc user, but only if
   // its insertion point was already at the head of the loop.
   InsertedExpressions[std::make_pair(S, InsertPt)] = V;
-
-  restoreInsertPoint(SaveInsertBB, SaveInsertPt);
   return V;
 }
 
@@ -1553,10 +1541,6 @@ void SCEVExpander::rememberInstruction(Value *I) {
     InsertedValues.insert(I);
 }
 
-void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) {
-  Builder.SetInsertPoint(BB, I);
-}
-
 /// getOrInsertCanonicalInductionVariable - This method returns the
 /// canonical induction variable of the specified type for the specified
 /// loop (inserting one if there is none).  A canonical induction variable
@@ -1572,11 +1556,8 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
                                    SE.getConstant(Ty, 1), L, SCEV::FlagAnyWrap);
 
   // Emit code for it.
-  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
-  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+  BuilderType::InsertPointGuard Guard(Builder);
   PHINode *V = cast<PHINode>(expandCodeFor(H, 0, L->getHeader()->begin()));
-  if (SaveInsertBB)
-    restoreInsertPoint(SaveInsertBB, SaveInsertPt);
 
   return V;
 }
@@ -1724,28 +1705,43 @@ namespace {
 // Currently, we only allow division by a nonzero constant here. If this is
 // inadequate, we could easily allow division by SCEVUnknown by using
 // ValueTracking to check isKnownNonZero().
+//
+// We cannot generally expand recurrences unless the step dominates the loop
+// header. The expander handles the special case of affine recurrences by
+// scaling the recurrence outside the loop, but this technique isn't generally
+// applicable. Expanding a nested recurrence outside a loop requires computing
+// binomial coefficients. This could be done, but the recurrence has to be in a
+// perfectly reduced form, which can't be guaranteed.
 struct SCEVFindUnsafe {
+  ScalarEvolution &SE;
   bool IsUnsafe;
 
-  SCEVFindUnsafe(): IsUnsafe(false) {}
+  SCEVFindUnsafe(ScalarEvolution &se): SE(se), IsUnsafe(false) {}
 
   bool follow(const SCEV *S) {
-    const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S);
-    if (!D)
-      return true;
-    const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS());
-    if (SC && !SC->getValue()->isZero())
-      return true;
-    IsUnsafe = true;
-    return false;
+    if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
+      const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS());
+      if (!SC || SC->getValue()->isZero()) {
+        IsUnsafe = true;
+        return false;
+      }
+    }
+    if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+      const SCEV *Step = AR->getStepRecurrence(SE);
+      if (!AR->isAffine() && !SE.dominates(Step, AR->getLoop()->getHeader())) {
+        IsUnsafe = true;
+        return false;
+      }
+    }
+    return true;
   }
   bool isDone() const { return IsUnsafe; }
 };
 }
 
 namespace llvm {
-bool isSafeToExpand(const SCEV *S) {
-  SCEVFindUnsafe Search;
+bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE) {
+  SCEVFindUnsafe Search(SE);
   visitAll(S, Search);
   return !Search.IsUnsafe;
 }
diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp
index dd2ed4f..f110616 100644
--- a/lib/Analysis/ScalarEvolutionNormalization.cpp
+++ b/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -119,11 +119,19 @@ TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) {
     const SCEV *Result = SE.getAddRecExpr(Operands, L, SCEV::FlagAnyWrap);
     switch (Kind) {
     case NormalizeAutodetect:
-      if (IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) {
-        const SCEV *TransformedStep =
-          TransformSubExpr(AR->getStepRecurrence(SE),
-                           User, OperandValToReplace);
-        Result = SE.getMinusSCEV(Result, TransformedStep);
+      // Normalize this SCEV by subtracting the expression for the final step.
+      // We only allow affine AddRecs to be normalized, otherwise we would not
+      // be able to correctly denormalize.
+      // e.g. {1,+,3,+,2} == {-2,+,1,+,2} + {3,+,2}
+      // Normalized form:   {-2,+,1,+,2}
+      // Denormalized form: {1,+,3,+,2}
+      //
+      // However, denormalization would use the a different step expression than
+      // normalization (see getPostIncExpr), generating the wrong final
+      // expression: {-2,+,1,+,2} + {1,+,2} => {-1,+,3,+,2}
+      if (AR->isAffine() &&
+          IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) {
+        Result = SE.getMinusSCEV(Result, AR->getStepRecurrence(SE));
         Loops.insert(L);
       }
 #if 0
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 4ad7162..0353295 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -96,6 +96,11 @@ bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
   return PrevTTI->isLoweredToCall(F);
 }
 
+void TargetTransformInfo::getUnrollingPreferences(Loop *L,
+                            UnrollingPreferences &UP) const {
+  PrevTTI->getUnrollingPreferences(L, UP);
+}
+
 bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const {
   return PrevTTI->isLegalAddImmediate(Imm);
 }
@@ -145,6 +150,10 @@ TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
   return PrevTTI->getPopcntSupport(IntTyWidthInBit);
 }
 
+bool TargetTransformInfo::haveFastSqrt(Type *Ty) const {
+  return PrevTTI->haveFastSqrt(Ty);
+}
+
 unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
   return PrevTTI->getIntImmCost(Imm, Ty);
 }
@@ -215,6 +224,11 @@ unsigned TargetTransformInfo::getAddressComputationCost(Type *Tp,
   return PrevTTI->getAddressComputationCost(Tp, IsComplex);
 }
 
+unsigned TargetTransformInfo::getReductionCost(unsigned Opcode, Type *Ty,
+                                               bool IsPairwise) const {
+  return PrevTTI->getReductionCost(Opcode, Ty, IsPairwise);
+}
+
 namespace {
 
 struct NoTTI : ImmutablePass, TargetTransformInfo {
@@ -265,26 +279,34 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
       // Otherwise, the default basic cost is used.
       return TCC_Basic;
 
-    case Instruction::IntToPtr:
+    case Instruction::IntToPtr: {
+      if (!DL)
+        return TCC_Basic;
+
       // An inttoptr cast is free so long as the input is a legal integer type
       // which doesn't contain values outside the range of a pointer.
-      if (DL && DL->isLegalInteger(OpTy->getScalarSizeInBits()) &&
-          OpTy->getScalarSizeInBits() <= DL->getPointerSizeInBits())
+      unsigned OpSize = OpTy->getScalarSizeInBits();
+      if (DL->isLegalInteger(OpSize) &&
+          OpSize <= DL->getPointerTypeSizeInBits(Ty))
         return TCC_Free;
 
       // Otherwise it's not a no-op.
       return TCC_Basic;
+    }
+    case Instruction::PtrToInt: {
+      if (!DL)
+        return TCC_Basic;
 
-    case Instruction::PtrToInt:
       // A ptrtoint cast is free so long as the result is large enough to store
       // the pointer, and a legal integer type.
-      if (DL && DL->isLegalInteger(Ty->getScalarSizeInBits()) &&
-          Ty->getScalarSizeInBits() >= DL->getPointerSizeInBits())
+      unsigned DestSize = Ty->getScalarSizeInBits();
+      if (DL->isLegalInteger(DestSize) &&
+          DestSize >= DL->getPointerTypeSizeInBits(OpTy))
         return TCC_Free;
 
       // Otherwise it's not a no-op.
       return TCC_Basic;
-
+    }
     case Instruction::Trunc:
       // trunc to a native type is free (assuming the target has compare and
       // shift-right of the same width).
@@ -457,6 +479,8 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
     return true;
   }
 
+  void getUnrollingPreferences(Loop *, UnrollingPreferences &) const { }
+
   bool isLegalAddImmediate(int64_t Imm) const {
     return false;
   }
@@ -505,6 +529,10 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
     return PSK_Software;
   }
 
+  bool haveFastSqrt(Type *Ty) const {
+    return false;
+  }
+
   unsigned getIntImmCost(const APInt &Imm, Type *Ty) const {
     return 1;
   }
@@ -569,6 +597,10 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
   unsigned getAddressComputationCost(Type *Tp, bool) const {
     return 0;
   }
+
+  unsigned getReductionCost(unsigned, Type *, bool) const {
+    return 1;
+  }
 };
 
 } // end anonymous namespace
diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp
index bbf3c3a..6791d4b 100644
--- a/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -16,7 +16,12 @@
 // typical C/C++ TBAA, but it can also be used to implement custom alias
 // analysis behavior for other languages.
 //
-// The current metadata format is very simple. TBAA MDNodes have up to
+// We now support two types of metadata format: scalar TBAA and struct-path
+// aware TBAA. After all testing cases are upgraded to use struct-path aware
+// TBAA and we can auto-upgrade existing bc files, the support for scalar TBAA
+// can be dropped.
+//
+// The scalar TBAA metadata format is very simple. TBAA MDNodes have up to
 // three fields, e.g.:
 //   !0 = metadata !{ metadata !"an example type tree" }
 //   !1 = metadata !{ metadata !"int", metadata !0 }
@@ -40,6 +45,65 @@
 // should return true; see
 // http://llvm.org/docs/AliasAnalysis.html#OtherItfs).
 //
+// With struct-path aware TBAA, the MDNodes attached to an instruction using
+// "!tbaa" are called path tag nodes.
+//
+// The path tag node has 4 fields with the last field being optional.
+//
+// The first field is the base type node, it can be a struct type node
+// or a scalar type node. The second field is the access type node, it
+// must be a scalar type node. The third field is the offset into the base type.
+// The last field has the same meaning as the last field of our scalar TBAA:
+// it's an integer which if equal to 1 indicates that the access is "constant".
+//
+// The struct type node has a name and a list of pairs, one pair for each member
+// of the struct. The first element of each pair is a type node (a struct type
+// node or a sclar type node), specifying the type of the member, the second
+// element of each pair is the offset of the member.
+//
+// Given an example
+// typedef struct {
+//   short s;
+// } A;
+// typedef struct {
+//   uint16_t s;
+//   A a;
+// } B;
+//
+// For an acess to B.a.s, we attach !5 (a path tag node) to the load/store
+// instruction. The base type is !4 (struct B), the access type is !2 (scalar
+// type short) and the offset is 4.
+//
+// !0 = metadata !{metadata !"Simple C/C++ TBAA"}
+// !1 = metadata !{metadata !"omnipotent char", metadata !0} // Scalar type node
+// !2 = metadata !{metadata !"short", metadata !1}           // Scalar type node
+// !3 = metadata !{metadata !"A", metadata !2, i64 0}        // Struct type node
+// !4 = metadata !{metadata !"B", metadata !2, i64 0, metadata !3, i64 4}
+//                                                           // Struct type node
+// !5 = metadata !{metadata !4, metadata !2, i64 4}          // Path tag node
+//
+// The struct type nodes and the scalar type nodes form a type DAG.
+//         Root (!0)
+//         char (!1)  -- edge to Root
+//         short (!2) -- edge to char
+//         A (!3) -- edge with offset 0 to short
+//         B (!4) -- edge with offset 0 to short and edge with offset 4 to A
+//
+// To check if two tags (tagX and tagY) can alias, we start from the base type
+// of tagX, follow the edge with the correct offset in the type DAG and adjust
+// the offset until we reach the base type of tagY or until we reach the Root
+// node.
+// If we reach the base type of tagY, compare the adjusted offset with
+// offset of tagY, return Alias if the offsets are the same, return NoAlias
+// otherwise.
+// If we reach the Root node, perform the above starting from base type of tagY
+// to see if we reach base type of tagX.
+//
+// If they have different roots, they're part of different potentially
+// unrelated type systems, so we return Alias to be conservative.
+// If neither node is an ancestor of the other and they have the same root,
+// then we say NoAlias.
+//
 // TODO: The current metadata format doesn't support struct
 // fields. For example:
 //   struct X {
@@ -71,7 +135,6 @@ using namespace llvm;
 // achieved by stripping the !tbaa tags from IR, but this option is sometimes
 // more convenient.
 static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true));
-static cl::opt<bool> EnableStructPathTBAA("struct-path-tbaa", cl::init(false));
 
 namespace {
   /// TBAANode - This is a simple wrapper around an MDNode which provides a
@@ -168,8 +231,12 @@ namespace {
       if (Node->getNumOperands() < 2)
         return TBAAStructTypeNode();
 
-      // Special handling for a scalar type node. 
+      // Fast path for a scalar type node and a struct type node with a single
+      // field.
       if (Node->getNumOperands() <= 3) {
+        uint64_t Cur = Node->getNumOperands() == 2 ? 0 :
+                       cast<ConstantInt>(Node->getOperand(2))->getZExtValue();
+        Offset -= Cur;
         MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
         if (!P)
           return TBAAStructTypeNode();
@@ -259,12 +326,21 @@ TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
   AliasAnalysis::getAnalysisUsage(AU);
 }
 
+/// Check the first operand of the tbaa tag node, if it is a MDNode, we treat
+/// it as struct-path aware TBAA format, otherwise, we treat it as scalar TBAA
+/// format.
+static bool isStructPathTBAA(const MDNode *MD) {
+  // Anonymous TBAA root starts with a MDNode and dragonegg uses it as
+  // a TBAA tag.
+  return isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3;
+}
+
 /// Aliases - Test whether the type represented by A may alias the
 /// type represented by B.
 bool
 TypeBasedAliasAnalysis::Aliases(const MDNode *A,
                                 const MDNode *B) const {
-  if (EnableStructPathTBAA)
+  if (isStructPathTBAA(A))
     return PathAliases(A, B);
 
   // Keep track of the root node for A and B.
@@ -397,8 +473,8 @@ bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Location &Loc,
 
   // If this is an "immutable" type, we can assume the pointer is pointing
   // to constant memory.
-  if ((!EnableStructPathTBAA && TBAANode(M).TypeIsImmutable()) ||
-      (EnableStructPathTBAA && TBAAStructTagNode(M).TypeIsImmutable()))
+  if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) ||
+      (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable()))
     return true;
 
   return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
@@ -414,8 +490,8 @@ TypeBasedAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
   // If this is an "immutable" type, we can assume the call doesn't write
   // to memory.
   if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
-    if ((!EnableStructPathTBAA && TBAANode(M).TypeIsImmutable()) ||
-        (EnableStructPathTBAA && TBAAStructTagNode(M).TypeIsImmutable()))
+    if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) ||
+        (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable()))
       Min = OnlyReadsMemory;
 
   return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
@@ -458,6 +534,25 @@ TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
   return AliasAnalysis::getModRefInfo(CS1, CS2);
 }
 
+bool MDNode::isTBAAVtableAccess() const {
+  if (!isStructPathTBAA(this)) {
+    if (getNumOperands() < 1) return false;
+    if (MDString *Tag1 = dyn_cast<MDString>(getOperand(0))) {
+      if (Tag1->getString() == "vtable pointer") return true;
+    }
+    return false;
+  }
+
+  // For struct-path aware TBAA, we use the access type of the tag.
+  if (getNumOperands() < 2) return false;
+  MDNode *Tag = cast_or_null<MDNode>(getOperand(1));
+  if (!Tag) return false;
+  if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) {
+    if (Tag1->getString() == "vtable pointer") return true;
+  }
+  return false;  
+}
+
 MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
   if (!A || !B)
     return NULL;
@@ -466,7 +561,8 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
     return A;
 
   // For struct-path aware TBAA, we use the access type of the tag.
-  if (EnableStructPathTBAA) {
+  bool StructPath = isStructPathTBAA(A);
+  if (StructPath) {
     A = cast_or_null<MDNode>(A->getOperand(1));
     if (!A) return 0;
     B = cast_or_null<MDNode>(B->getOperand(1));
@@ -499,7 +595,7 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
     --IA;
     --IB;
   }
-  if (!EnableStructPathTBAA)
+  if (!StructPath)
     return Ret;
 
   if (!Ret)
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 4591af8..e39ee62 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -15,6 +15,7 @@
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/GlobalAlias.h"
@@ -39,8 +40,8 @@ const unsigned MaxDepth = 6;
 static unsigned getBitWidth(Type *Ty, const DataLayout *TD) {
   if (unsigned BitWidth = Ty->getScalarSizeInBits())
     return BitWidth;
-  assert(isa<PointerType>(Ty) && "Expected a pointer type!");
-  return TD ? TD->getPointerSizeInBits() : 0;
+
+  return TD ? TD->getPointerTypeSizeInBits(Ty) : 0;
 }
 
 static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
@@ -629,9 +630,19 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
       Value *Index = I->getOperand(i);
       if (StructType *STy = dyn_cast<StructType>(*GTI)) {
         // Handle struct member offset arithmetic.
-        if (!TD) return;
-        const StructLayout *SL = TD->getStructLayout(STy);
+        if (!TD)
+          return;
+
+        // Handle case when index is vector zeroinitializer
+        Constant *CIndex = cast<Constant>(Index);
+        if (CIndex->isZeroValue())
+          continue;
+
+        if (CIndex->getType()->isVectorTy())
+          Index = CIndex->getSplatValue();
+
         unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
+        const StructLayout *SL = TD->getStructLayout(STy);
         uint64_t Offset = SL->getElementOffset(Idx);
         TrailZ = std::min<unsigned>(TrailZ,
                                     countTrailingZeros(Offset));
@@ -749,7 +760,6 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
         KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
         break;
       }
-      case Intrinsic::x86_sse42_crc32_64_8:
       case Intrinsic::x86_sse42_crc32_64_64:
         KnownZero = APInt::getHighBitsSet(64, 32);
         break;
@@ -1704,20 +1714,24 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
 /// it can be expressed as a base pointer plus a constant offset.  Return the
 /// base and offset to the caller.
 Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
-                                              const DataLayout *TD) {
+                                              const DataLayout *DL) {
   // Without DataLayout, conservatively assume 64-bit offsets, which is
   // the widest we support.
-  unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64;
+  unsigned BitWidth = DL ? DL->getPointerTypeSizeInBits(Ptr->getType()) : 64;
   APInt ByteOffset(BitWidth, 0);
   while (1) {
     if (Ptr->getType()->isVectorTy())
       break;
 
     if (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
-      APInt GEPOffset(BitWidth, 0);
-      if (TD && !GEP->accumulateConstantOffset(*TD, GEPOffset))
-        break;
-      ByteOffset += GEPOffset;
+      if (DL) {
+        APInt GEPOffset(BitWidth, 0);
+        if (!GEP->accumulateConstantOffset(*DL, GEPOffset))
+          break;
+
+        ByteOffset += GEPOffset;
+      }
+
       Ptr = GEP->getPointerOperand();
     } else if (Operator::getOpcode(Ptr) == Instruction::BitCast) {
       Ptr = cast<Operator>(Ptr)->getOperand(0);
@@ -2050,7 +2064,7 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
 
 /// isKnownNonNull - Return true if we know that the specified value is never
 /// null.
-bool llvm::isKnownNonNull(const Value *V) {
+bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
   // Alloca never returns null, malloc might.
   if (isa<AllocaInst>(V)) return true;
 
@@ -2061,5 +2075,10 @@ bool llvm::isKnownNonNull(const Value *V) {
   // Global values are not null unless extern weak.
   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
     return !GV->hasExternalWeakLinkage();
+
+  // operator new never returns null.
+  if (isOperatorNewLikeFn(V, TLI, /*LookThroughBitCast=*/true))
+    return true;
+
   return false;
 }
author	Stephen Hines <srhines@google.com>	2014-02-11 20:01:10 -0800
committer	Stephen Hines <srhines@google.com>	2014-02-11 20:01:10 -0800
commit	ce9904c6ea8fd669978a8eefb854b330eb9828ff (patch)
tree	2418ee2e96ea220977c8fb74959192036ab5b133 /lib/Analysis
parent	c27b10b198c1d9e9b51f2303994313ec2778edd7 (diff)
parent	dbb832b83351cec97b025b61c26536ef50c3181c (diff)
download	external_llvm-ce9904c6ea8fd669978a8eefb854b330eb9828ff.zip external_llvm-ce9904c6ea8fd669978a8eefb854b330eb9828ff.tar.gz external_llvm-ce9904c6ea8fd669978a8eefb854b330eb9828ff.tar.bz2