Merge upstream to r133240 at Fri. 17th Jun 2011.

Conflicts: lib/CodeGen/AsmPrinter/AsmPrinter.cpp lib/Target/ARM/ARMCodeEmitter.cpp
author: Nowar Gu <nowar100@gmail.com> 2011-06-17 14:29:24 +0800
committer: Nowar Gu <nowar100@gmail.com> 2011-06-20 15:49:07 +0800
commit: 907af0f20f58f2ea26da7ea64e1f094cd6880db7 (patch)
tree: 02007757de416c561df174d582205cebfa582801 /lib/Analysis
parent: 1d4f9a57447faa0142a1d0301e5ce550cfe60c4f (diff)
parent: ec324e5ae44025c6bdb930b78198f30f807e355b (diff)
download: external_llvm-907af0f20f58f2ea26da7ea64e1f094cd6880db7.zip
external_llvm-907af0f20f58f2ea26da7ea64e1f094cd6880db7.tar.gz
external_llvm-907af0f20f58f2ea26da7ea64e1f094cd6880db7.tar.bz2
31 files changed, 1403 insertions, 335 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index be02ddb..c189a00 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -86,14 +86,20 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS,
 
   if (onlyAccessesArgPointees(MRB)) {
     bool doesAlias = false;
-    if (doesAccessArgPointees(MRB))
+    if (doesAccessArgPointees(MRB)) {
+      MDNode *CSTag = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa);
       for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
-           AI != AE; ++AI)
-        if (!isNoAlias(Location(*AI), Loc)) {
+           AI != AE; ++AI) {
+        const Value *Arg = *AI;
+        if (!Arg->getType()->isPointerTy())
+          continue;
+        Location CSLoc(Arg, UnknownSize, CSTag);
+        if (!isNoAlias(CSLoc, Loc)) {
           doesAlias = true;
           break;
         }
-
+      }
+    }
     if (!doesAlias)
       return NoModRef;
   }
@@ -138,13 +144,19 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
   // CS2's arguments.
   if (onlyAccessesArgPointees(CS2B)) {
     AliasAnalysis::ModRefResult R = NoModRef;
-    if (doesAccessArgPointees(CS2B))
+    if (doesAccessArgPointees(CS2B)) {
+      MDNode *CS2Tag = CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa);
       for (ImmutableCallSite::arg_iterator
            I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) {
-        R = ModRefResult((R | getModRefInfo(CS1, *I, UnknownSize)) & Mask);
+        const Value *Arg = *I;
+        if (!Arg->getType()->isPointerTy())
+          continue;
+        Location CS2Loc(Arg, UnknownSize, CS2Tag);
+        R = ModRefResult((R | getModRefInfo(CS1, CS2Loc)) & Mask);
         if (R == Mask)
           break;
       }
+    }
     return R;
   }
 
@@ -152,13 +164,20 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
   // any of the memory referenced by CS1's arguments. If not, return NoModRef.
   if (onlyAccessesArgPointees(CS1B)) {
     AliasAnalysis::ModRefResult R = NoModRef;
-    if (doesAccessArgPointees(CS1B))
+    if (doesAccessArgPointees(CS1B)) {
+      MDNode *CS1Tag = CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa);
       for (ImmutableCallSite::arg_iterator
-           I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I)
-        if (getModRefInfo(CS2, *I, UnknownSize) != NoModRef) {
+           I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) {
+        const Value *Arg = *I;
+        if (!Arg->getType()->isPointerTy())
+          continue;
+        Location CS1Loc(Arg, UnknownSize, CS1Tag);
+        if (getModRefInfo(CS2, CS1Loc) != NoModRef) {
           R = Mask;
           break;
         }
+      }
+    }
     if (R == NoModRef)
       return R;
   }
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index 3a46976..2ed6949 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -602,6 +602,10 @@ void AliasSetTracker::ASTCallbackVH::deleted() {
   // this now dangles!
 }
 
+void AliasSetTracker::ASTCallbackVH::allUsesReplacedWith(Value *V) {
+  AST->copyValue(getValPtr(), V);
+}
+
 AliasSetTracker::ASTCallbackVH::ASTCallbackVH(Value *V, AliasSetTracker *ast)
   : CallbackVH(V), AST(ast) {}
 
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index 6ebe100..e57ba78 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -23,6 +23,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
   initializeAliasSetPrinterPass(Registry);
   initializeNoAAPass(Registry);
   initializeBasicAliasAnalysisPass(Registry);
+  initializeBranchProbabilityInfoPass(Registry);
   initializeCFGViewerPass(Registry);
   initializeCFGPrinterPass(Registry);
   initializeCFGOnlyViewerPass(Registry);
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index f7bcd9e..8330ea7 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -281,17 +281,20 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
       continue;
     }
 
-    if (const Instruction *I = dyn_cast<Instruction>(V))
-      // TODO: Get a DominatorTree and use it here.
-      if (const Value *Simplified =
-            SimplifyInstruction(const_cast<Instruction *>(I), TD)) {
-        V = Simplified;
-        continue;
-      }
-    
     const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op);
-    if (GEPOp == 0)
+    if (GEPOp == 0) {
+      // If it's not a GEP, hand it off to SimplifyInstruction to see if it
+      // can come up with something. This matches what GetUnderlyingObject does.
+      if (const Instruction *I = dyn_cast<Instruction>(V))
+        // TODO: Get a DominatorTree and use it here.
+        if (const Value *Simplified =
+              SimplifyInstruction(const_cast<Instruction *>(I), TD)) {
+          V = Simplified;
+          continue;
+        }
+    
       return V;
+    }
     
     // Don't attempt to analyze GEPs over unsized objects.
     if (!cast<PointerType>(GEPOp->getOperand(0)->getType())
@@ -350,7 +353,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
       Scale *= IndexScale.getSExtValue();
       
       
-      // If we already had an occurrance of this index variable, merge this
+      // If we already had an occurrence of this index variable, merge this
       // scale into it.  For example, we want to handle:
       //   A[x][x] -> x*16 + x*4 -> x*20
       // This also ensures that 'x' only appears in the index list once.
@@ -448,7 +451,13 @@ namespace {
   /// BasicAliasAnalysis - This is the primary alias analysis implementation.
   struct BasicAliasAnalysis : public ImmutablePass, public AliasAnalysis {
     static char ID; // Class identification, replacement for typeinfo
-    BasicAliasAnalysis() : ImmutablePass(ID) {
+    BasicAliasAnalysis() : ImmutablePass(ID),
+                           // AliasCache rarely has more than 1 or 2 elements,
+                           // so start it off fairly small so that clear()
+                           // doesn't have to tromp through 64 (the default)
+                           // elements on each alias query. This really wants
+                           // something like a SmallDenseMap.
+                           AliasCache(8) {
       initializeBasicAliasAnalysisPass(*PassRegistry::getPassRegistry());
     }
 
@@ -462,12 +471,12 @@ namespace {
 
     virtual AliasResult alias(const Location &LocA,
                               const Location &LocB) {
-      assert(Visited.empty() && "Visited must be cleared after use!");
+      assert(AliasCache.empty() && "AliasCache must be cleared after use!");
       assert(notDifferentParent(LocA.Ptr, LocB.Ptr) &&
              "BasicAliasAnalysis doesn't support interprocedural queries.");
       AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.TBAATag,
                                      LocB.Ptr, LocB.Size, LocB.TBAATag);
-      Visited.clear();
+      AliasCache.clear();
       return Alias;
     }
 
@@ -503,7 +512,12 @@ namespace {
     }
     
   private:
-    // Visited - Track instructions visited by a aliasPHI, aliasSelect(), and aliasGEP().
+    // AliasCache - Track alias queries to guard against recursion.
+    typedef std::pair<Location, Location> LocPair;
+    typedef DenseMap<LocPair, AliasResult> AliasCacheTy;
+    AliasCacheTy AliasCache;
+
+    // Visited - Track instructions visited by pointsToConstantMemory.
     SmallPtrSet<const Value*, 16> Visited;
 
     // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP
@@ -680,9 +694,12 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
     unsigned ArgNo = 0;
     for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
          CI != CE; ++CI, ++ArgNo) {
-      // Only look at the no-capture pointer arguments.
+      // Only look at the no-capture or byval pointer arguments.  If this
+      // pointer were passed to arguments that were neither of these, then it
+      // couldn't be no-capture.
       if (!(*CI)->getType()->isPointerTy() ||
-          !CS.paramHasAttr(ArgNo+1, Attribute::NoCapture))
+          (!CS.paramHasAttr(ArgNo+1, Attribute::NoCapture) &&
+           !CS.paramHasAttr(ArgNo+1, Attribute::ByVal)))
         continue;
       
       // If this is a no-capture pointer argument, see if we can tell that it
@@ -779,6 +796,26 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
         return NoModRef;
       break;
     }
+    case Intrinsic::arm_neon_vld1: {
+      // LLVM's vld1 and vst1 intrinsics currently only support a single
+      // vector register.
+      uint64_t Size =
+        TD ? TD->getTypeStoreSize(II->getType()) : UnknownSize;
+      if (isNoAlias(Location(II->getArgOperand(0), Size,
+                             II->getMetadata(LLVMContext::MD_tbaa)),
+                    Loc))
+        return NoModRef;
+      break;
+    }
+    case Intrinsic::arm_neon_vst1: {
+      uint64_t Size =
+        TD ? TD->getTypeStoreSize(II->getArgOperand(1)->getType()) : UnknownSize;
+      if (isNoAlias(Location(II->getArgOperand(0), Size,
+                             II->getMetadata(LLVMContext::MD_tbaa)),
+                    Loc))
+        return NoModRef;
+      break;
+    }
     }
 
   // The AliasAnalysis base class has some smarts, lets use them.
@@ -796,13 +833,6 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
                              const MDNode *V2TBAAInfo,
                              const Value *UnderlyingV1,
                              const Value *UnderlyingV2) {
-  // If this GEP has been visited before, we're on a use-def cycle.
-  // Such cycles are only valid when PHI nodes are involved or in unreachable
-  // code. The visitPHI function catches cycles containing PHIs, but there
-  // could still be a cycle without PHIs in unreachable code.
-  if (!Visited.insert(GEP1))
-    return MayAlias;
-
   int64_t GEP1BaseOffset;
   SmallVector<VariableGEPIndex, 4> GEP1VariableIndices;
 
@@ -883,7 +913,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
   if (GEP1BaseOffset == 0 && GEP1VariableIndices.empty())
     return MustAlias;
 
-  // If there is a difference betwen the pointers, but the difference is
+  // If there is a difference between the pointers, but the difference is
   // less than the size of the associated memory object, then we know
   // that the objects are partially overlapping.
   if (GEP1BaseOffset != 0 && GEP1VariableIndices.empty()) {
@@ -920,7 +950,30 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
       return NoAlias;
   }
   
-  return MayAlias;
+  // Statically, we can see that the base objects are the same, but the
+  // pointers have dynamic offsets which we can't resolve. And none of our
+  // little tricks above worked.
+  //
+  // TODO: Returning PartialAlias instead of MayAlias is a mild hack; the
+  // practical effect of this is protecting TBAA in the case of dynamic
+  // indices into arrays of unions. An alternative way to solve this would
+  // be to have clang emit extra metadata for unions and/or union accesses.
+  // A union-specific solution wouldn't handle the problem for malloc'd
+  // memory however.
+  return PartialAlias;
+}
+
+static AliasAnalysis::AliasResult
+MergeAliasResults(AliasAnalysis::AliasResult A, AliasAnalysis::AliasResult B) {
+  // If the results agree, take it.
+  if (A == B)
+    return A;
+  // A mix of PartialAlias and MustAlias is PartialAlias.
+  if ((A == AliasAnalysis::PartialAlias && B == AliasAnalysis::MustAlias) ||
+      (B == AliasAnalysis::PartialAlias && A == AliasAnalysis::MustAlias))
+    return AliasAnalysis::PartialAlias;
+  // Otherwise, we don't know anything.
+  return AliasAnalysis::MayAlias;
 }
 
 /// aliasSelect - Provide a bunch of ad-hoc rules to disambiguate a Select
@@ -930,13 +983,6 @@ BasicAliasAnalysis::aliasSelect(const SelectInst *SI, uint64_t SISize,
                                 const MDNode *SITBAAInfo,
                                 const Value *V2, uint64_t V2Size,
                                 const MDNode *V2TBAAInfo) {
-  // If this select has been visited before, we're on a use-def cycle.
-  // Such cycles are only valid when PHI nodes are involved or in unreachable
-  // code. The visitPHI function catches cycles containing PHIs, but there
-  // could still be a cycle without PHIs in unreachable code.
-  if (!Visited.insert(SI))
-    return MayAlias;
-
   // If the values are Selects with the same condition, we can do a more precise
   // check: just check for aliases between the values on corresponding arms.
   if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2))
@@ -949,9 +995,7 @@ BasicAliasAnalysis::aliasSelect(const SelectInst *SI, uint64_t SISize,
       AliasResult ThisAlias =
         aliasCheck(SI->getFalseValue(), SISize, SITBAAInfo,
                    SI2->getFalseValue(), V2Size, V2TBAAInfo);
-      if (ThisAlias != Alias)
-        return MayAlias;
-      return Alias;
+      return MergeAliasResults(ThisAlias, Alias);
     }
 
   // If both arms of the Select node NoAlias or MustAlias V2, then returns
@@ -961,16 +1005,9 @@ BasicAliasAnalysis::aliasSelect(const SelectInst *SI, uint64_t SISize,
   if (Alias == MayAlias)
     return MayAlias;
 
-  // If V2 is visited, the recursive case will have been caught in the
-  // above aliasCheck call, so these subsequent calls to aliasCheck
-  // don't need to assume that V2 is being visited recursively.
-  Visited.erase(V2);
-
   AliasResult ThisAlias =
     aliasCheck(V2, V2Size, V2TBAAInfo, SI->getFalseValue(), SISize, SITBAAInfo);
-  if (ThisAlias != Alias)
-    return MayAlias;
-  return Alias;
+  return MergeAliasResults(ThisAlias, Alias);
 }
 
 // aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI instruction
@@ -980,10 +1017,6 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
                              const MDNode *PNTBAAInfo,
                              const Value *V2, uint64_t V2Size,
                              const MDNode *V2TBAAInfo) {
-  // The PHI node has already been visited, avoid recursion any further.
-  if (!Visited.insert(PN))
-    return MayAlias;
-
   // If the values are PHIs in the same block, we can do a more precise
   // as well as efficient check: just check for aliases between the values
   // on corresponding edges.
@@ -1000,8 +1033,9 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
           aliasCheck(PN->getIncomingValue(i), PNSize, PNTBAAInfo,
                      PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)),
                      V2Size, V2TBAAInfo);
-        if (ThisAlias != Alias)
-          return MayAlias;
+        Alias = MergeAliasResults(ThisAlias, Alias);
+        if (Alias == MayAlias)
+          break;
       }
       return Alias;
     }
@@ -1032,15 +1066,11 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
   for (unsigned i = 1, e = V1Srcs.size(); i != e; ++i) {
     Value *V = V1Srcs[i];
 
-    // If V2 is visited, the recursive case will have been caught in the
-    // above aliasCheck call, so these subsequent calls to aliasCheck
-    // don't need to assume that V2 is being visited recursively.
-    Visited.erase(V2);
-
     AliasResult ThisAlias = aliasCheck(V2, V2Size, V2TBAAInfo,
                                        V, PNSize, PNTBAAInfo);
-    if (ThisAlias != Alias || ThisAlias == MayAlias)
-      return MayAlias;
+    Alias = MergeAliasResults(ThisAlias, Alias);
+    if (Alias == MayAlias)
+      break;
   }
 
   return Alias;
@@ -1125,6 +1155,17 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
         (V2Size != UnknownSize && isObjectSmallerThan(O1, V2Size, *TD)))
       return NoAlias;
   
+  // Check the cache before climbing up use-def chains. This also terminates
+  // otherwise infinitely recursive queries.
+  LocPair Locs(Location(V1, V1Size, V1TBAAInfo),
+               Location(V2, V2Size, V2TBAAInfo));
+  if (V1 > V2)
+    std::swap(Locs.first, Locs.second);
+  std::pair<AliasCacheTy::iterator, bool> Pair =
+    AliasCache.insert(std::make_pair(Locs, MayAlias));
+  if (!Pair.second)
+    return Pair.first->second;
+
   // FIXME: This isn't aggressively handling alias(GEP, PHI) for example: if the
   // GEP can't simplify, we don't even look at the PHI cases.
   if (!isa<GEPOperator>(V1) && isa<GEPOperator>(V2)) {
@@ -1134,7 +1175,7 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
   }
   if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1)) {
     AliasResult Result = aliasGEP(GV1, V1Size, V2, V2Size, V2TBAAInfo, O1, O2);
-    if (Result != MayAlias) return Result;
+    if (Result != MayAlias) return AliasCache[Locs] = Result;
   }
 
   if (isa<PHINode>(V2) && !isa<PHINode>(V1)) {
@@ -1144,7 +1185,7 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
   if (const PHINode *PN = dyn_cast<PHINode>(V1)) {
     AliasResult Result = aliasPHI(PN, V1Size, V1TBAAInfo,
                                   V2, V2Size, V2TBAAInfo);
-    if (Result != MayAlias) return Result;
+    if (Result != MayAlias) return AliasCache[Locs] = Result;
   }
 
   if (isa<SelectInst>(V2) && !isa<SelectInst>(V1)) {
@@ -1154,7 +1195,7 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
   if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) {
     AliasResult Result = aliasSelect(S1, V1Size, V1TBAAInfo,
                                      V2, V2Size, V2TBAAInfo);
-    if (Result != MayAlias) return Result;
+    if (Result != MayAlias) return AliasCache[Locs] = Result;
   }
 
   // If both pointers are pointing into the same object and one of them
@@ -1163,8 +1204,10 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
   if (TD && O1 == O2)
     if ((V1Size != UnknownSize && isObjectSize(O1, V1Size, *TD)) ||
         (V2Size != UnknownSize && isObjectSize(O2, V2Size, *TD)))
-      return PartialAlias;
+      return AliasCache[Locs] = PartialAlias;
 
-  return AliasAnalysis::alias(Location(V1, V1Size, V1TBAAInfo),
-                              Location(V2, V2Size, V2TBAAInfo));
+  AliasResult Result =
+    AliasAnalysis::alias(Location(V1, V1Size, V1TBAAInfo),
+                         Location(V2, V2Size, V2TBAAInfo));
+  return AliasCache[Locs] = Result;
 }
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
new file mode 100644
index 0000000..15059c7
--- /dev/null
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -0,0 +1,358 @@
+//===-- BranchProbabilityInfo.cpp - Branch Probability Analysis -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Loops should be simplified before this analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+INITIALIZE_PASS_BEGIN(BranchProbabilityInfo, "branch-prob",
+                      "Branch Probability Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(BranchProbabilityInfo, "branch-prob",
+                    "Branch Probability Analysis", false, true)
+
+char BranchProbabilityInfo::ID = 0;
+
+namespace {
+// Please note that BranchProbabilityAnalysis is not a FunctionPass.
+// It is created by BranchProbabilityInfo (which is a FunctionPass), which
+// provides a clear interface. Thanks to that, all heuristics and other
+// private methods are hidden in the .cpp file.
+class BranchProbabilityAnalysis {
+
+  typedef std::pair<BasicBlock *, BasicBlock *> Edge;
+
+  DenseMap<Edge, uint32_t> *Weights;
+
+  BranchProbabilityInfo *BP;
+
+  LoopInfo *LI;
+
+
+  // Weights are for internal use only. They are used by heuristics to help to
+  // estimate edges' probability. Example:
+  //
+  // Using "Loop Branch Heuristics" we predict weights of edges for the
+  // block BB2.
+  //         ...
+  //          |
+  //          V
+  //         BB1<-+
+  //          |   |
+  //          |   | (Weight = 128)
+  //          V   |
+  //         BB2--+
+  //          |
+  //          | (Weight = 4)
+  //          V
+  //         BB3
+  //
+  // Probability of the edge BB2->BB1 = 128 / (128 + 4) = 0.9696..
+  // Probability of the edge BB2->BB3 = 4 / (128 + 4) = 0.0303..
+
+  static const uint32_t LBH_TAKEN_WEIGHT = 128;
+  static const uint32_t LBH_NONTAKEN_WEIGHT = 4;
+
+  // Standard weight value. Used when none of the heuristics set weight for
+  // the edge.
+  static const uint32_t NORMAL_WEIGHT = 16;
+
+  // Minimum weight of an edge. Please note, that weight is NEVER 0.
+  static const uint32_t MIN_WEIGHT = 1;
+
+  // Return TRUE if BB leads directly to a Return Instruction.
+  static bool isReturningBlock(BasicBlock *BB) {
+    SmallPtrSet<BasicBlock *, 8> Visited;
+
+    while (true) {
+      TerminatorInst *TI = BB->getTerminator();
+      if (isa<ReturnInst>(TI))
+        return true;
+
+      if (TI->getNumSuccessors() > 1)
+        break;
+
+      // It is unreachable block which we can consider as a return instruction.
+      if (TI->getNumSuccessors() == 0)
+        return true;
+
+      Visited.insert(BB);
+      BB = TI->getSuccessor(0);
+
+      // Stop if cycle is detected.
+      if (Visited.count(BB))
+        return false;
+    }
+
+    return false;
+  }
+
+  // Multiply Edge Weight by two.
+  void incEdgeWeight(BasicBlock *Src, BasicBlock *Dst) {
+    uint32_t Weight = BP->getEdgeWeight(Src, Dst);
+    uint32_t MaxWeight = getMaxWeightFor(Src);
+
+    if (Weight * 2 > MaxWeight)
+      BP->setEdgeWeight(Src, Dst, MaxWeight);
+    else
+      BP->setEdgeWeight(Src, Dst, Weight * 2);
+  }
+
+  // Divide Edge Weight by two.
+  void decEdgeWeight(BasicBlock *Src, BasicBlock *Dst) {
+    uint32_t Weight = BP->getEdgeWeight(Src, Dst);
+
+    assert(Weight > 0);
+    if (Weight / 2 < MIN_WEIGHT)
+      BP->setEdgeWeight(Src, Dst, MIN_WEIGHT);
+    else
+      BP->setEdgeWeight(Src, Dst, Weight / 2);
+  }
+
+
+  uint32_t getMaxWeightFor(BasicBlock *BB) const {
+    return UINT32_MAX / BB->getTerminator()->getNumSuccessors();
+  }
+
+public:
+  BranchProbabilityAnalysis(DenseMap<Edge, uint32_t> *W,
+                            BranchProbabilityInfo *BP, LoopInfo *LI)
+    : Weights(W), BP(BP), LI(LI) {
+  }
+
+  // Return Heuristics
+  void calcReturnHeuristics(BasicBlock *BB);
+
+  // Pointer Heuristics
+  void calcPointerHeuristics(BasicBlock *BB);
+
+  // Loop Branch Heuristics
+  void calcLoopBranchHeuristics(BasicBlock *BB);
+
+  bool runOnFunction(Function &F);
+};
+} // end anonymous namespace
+
+// Calculate Edge Weights using "Return Heuristics". Predict a successor which
+// leads directly to Return Instruction will not be taken.
+void BranchProbabilityAnalysis::calcReturnHeuristics(BasicBlock *BB){
+  if (BB->getTerminator()->getNumSuccessors() == 1)
+    return;
+
+  for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+    BasicBlock *Succ = *I;
+    if (isReturningBlock(Succ)) {
+      decEdgeWeight(BB, Succ);
+    }
+  }
+}
+
+// Calculate Edge Weights using "Pointer Heuristics". Predict a comparsion
+// between two pointer or pointer and NULL will fail.
+void BranchProbabilityAnalysis::calcPointerHeuristics(BasicBlock *BB) {
+  BranchInst * BI = dyn_cast<BranchInst>(BB->getTerminator());
+  if (!BI || !BI->isConditional())
+    return;
+
+  Value *Cond = BI->getCondition();
+  ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
+  if (!CI)
+    return;
+
+  Value *LHS = CI->getOperand(0);
+
+  if (!LHS->getType()->isPointerTy())
+    return;
+
+  assert(CI->getOperand(1)->getType()->isPointerTy());
+
+  BasicBlock *Taken = BI->getSuccessor(0);
+  BasicBlock *NonTaken = BI->getSuccessor(1);
+
+  // p != 0   ->   isProb = true
+  // p == 0   ->   isProb = false
+  // p != q   ->   isProb = true
+  // p == q   ->   isProb = false;
+  bool isProb = !CI->isEquality();
+  if (!isProb)
+    std::swap(Taken, NonTaken);
+
+  incEdgeWeight(BB, Taken);
+  decEdgeWeight(BB, NonTaken);
+}
+
+// Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges
+// as taken, exiting edges as not-taken.
+void BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) {
+  uint32_t numSuccs = BB->getTerminator()->getNumSuccessors();
+
+  Loop *L = LI->getLoopFor(BB);
+  if (!L)
+    return;
+
+  SmallVector<BasicBlock *, 8> BackEdges;
+  SmallVector<BasicBlock *, 8> ExitingEdges;
+
+  for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+    BasicBlock *Succ = *I;
+    Loop *SuccL = LI->getLoopFor(Succ);
+    if (SuccL != L)
+      ExitingEdges.push_back(Succ);
+    else if (Succ == L->getHeader())
+      BackEdges.push_back(Succ);
+  }
+
+  if (uint32_t numBackEdges = BackEdges.size()) {
+    uint32_t backWeight = LBH_TAKEN_WEIGHT / numBackEdges;
+    if (backWeight < NORMAL_WEIGHT)
+      backWeight = NORMAL_WEIGHT;
+
+    for (SmallVector<BasicBlock *, 8>::iterator EI = BackEdges.begin(),
+         EE = BackEdges.end(); EI != EE; ++EI) {
+      BasicBlock *Back = *EI;
+      BP->setEdgeWeight(BB, Back, backWeight);
+    }
+  }
+
+  uint32_t numExitingEdges = ExitingEdges.size();
+  if (uint32_t numNonExitingEdges = numSuccs - numExitingEdges) {
+    uint32_t exitWeight = LBH_NONTAKEN_WEIGHT / numNonExitingEdges;
+    if (exitWeight < MIN_WEIGHT)
+      exitWeight = MIN_WEIGHT;
+
+    for (SmallVector<BasicBlock *, 8>::iterator EI = ExitingEdges.begin(),
+         EE = ExitingEdges.end(); EI != EE; ++EI) {
+      BasicBlock *Exiting = *EI;
+      BP->setEdgeWeight(BB, Exiting, exitWeight);
+    }
+  }
+}
+
+bool BranchProbabilityAnalysis::runOnFunction(Function &F) {
+
+  for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
+    BasicBlock *BB = I++;
+
+    // Only LBH uses setEdgeWeight method.
+    calcLoopBranchHeuristics(BB);
+
+    // PH and RH use only incEdgeWeight and decEwdgeWeight methods to
+    // not efface LBH results.
+    calcPointerHeuristics(BB);
+    calcReturnHeuristics(BB);
+  }
+
+  return false;
+}
+
+
+bool BranchProbabilityInfo::runOnFunction(Function &F) {
+  LoopInfo &LI = getAnalysis<LoopInfo>();
+  BranchProbabilityAnalysis BPA(&Weights, this, &LI);
+  return BPA.runOnFunction(F);
+}
+
+uint32_t BranchProbabilityInfo::getSumForBlock(BasicBlock *BB) const {
+  uint32_t Sum = 0;
+
+  for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+    BasicBlock *Succ = *I;
+    uint32_t Weight = getEdgeWeight(BB, Succ);
+    uint32_t PrevSum = Sum;
+
+    Sum += Weight;
+    assert(Sum > PrevSum); (void) PrevSum;
+  }
+
+  return Sum;
+}
+
+bool BranchProbabilityInfo::isEdgeHot(BasicBlock *Src, BasicBlock *Dst) const {
+  // Hot probability is at least 4/5 = 80%
+  uint32_t Weight = getEdgeWeight(Src, Dst);
+  uint32_t Sum = getSumForBlock(Src);
+
+  // FIXME: Implement BranchProbability::compare then change this code to
+  // compare this BranchProbability against a static "hot" BranchProbability.
+  return (uint64_t)Weight * 5 > (uint64_t)Sum * 4;
+}
+
+BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const {
+  uint32_t Sum = 0;
+  uint32_t MaxWeight = 0;
+  BasicBlock *MaxSucc = 0;
+
+  for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+    BasicBlock *Succ = *I;
+    uint32_t Weight = getEdgeWeight(BB, Succ);
+    uint32_t PrevSum = Sum;
+
+    Sum += Weight;
+    assert(Sum > PrevSum); (void) PrevSum;
+
+    if (Weight > MaxWeight) {
+      MaxWeight = Weight;
+      MaxSucc = Succ;
+    }
+  }
+
+  // FIXME: Use BranchProbability::compare.
+  if ((uint64_t)MaxWeight * 5 > (uint64_t)Sum * 4)
+    return MaxSucc;
+
+  return 0;
+}
+
+// Return edge's weight. If can't find it, return DEFAULT_WEIGHT value.
+uint32_t
+BranchProbabilityInfo::getEdgeWeight(BasicBlock *Src, BasicBlock *Dst) const {
+  Edge E(Src, Dst);
+  DenseMap<Edge, uint32_t>::const_iterator I = Weights.find(E);
+
+  if (I != Weights.end())
+    return I->second;
+
+  return DEFAULT_WEIGHT;
+}
+
+void BranchProbabilityInfo::setEdgeWeight(BasicBlock *Src, BasicBlock *Dst,
+                                     uint32_t Weight) {
+  Weights[std::make_pair(Src, Dst)] = Weight;
+  DEBUG(dbgs() << "set edge " << Src->getNameStr() << " -> "
+               << Dst->getNameStr() << " weight to " << Weight
+               << (isEdgeHot(Src, Dst) ? " [is HOT now]\n" : "\n"));
+}
+
+
+BranchProbability BranchProbabilityInfo::
+getEdgeProbability(BasicBlock *Src, BasicBlock *Dst) const {
+
+  uint32_t N = getEdgeWeight(Src, Dst);
+  uint32_t D = getSumForBlock(Src);
+
+  return BranchProbability(N, D);
+}
+
+raw_ostream &
+BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS, BasicBlock *Src,
+                                            BasicBlock *Dst) const {
+
+  const BranchProbability Prob = getEdgeProbability(Src, Dst);
+  OS << "edge " << Src->getNameStr() << " -> " << Dst->getNameStr()
+     << " probability is " << Prob
+     << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n");
+
+  return OS;
+}
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index 6be5617..1a975bf 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -6,6 +6,7 @@ add_llvm_library(LLVMAnalysis
   AliasSetTracker.cpp
   Analysis.cpp
   BasicAliasAnalysis.cpp
+  BranchProbabilityInfo.cpp
   CFGPrinter.cpp
   CaptureTracking.cpp
   ConstantFolding.cpp
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index 42a54d9..b2c27d1 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -17,6 +17,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Constants.h"
 #include "llvm/Instructions.h"
 #include "llvm/Value.h"
 #include "llvm/Analysis/AliasAnalysis.h"
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index c6aad0c..08a6065 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -23,6 +23,7 @@
 #include "llvm/GlobalVariable.h"
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
+#include "llvm/Operator.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/ADT/SmallVector.h"
@@ -1084,7 +1085,7 @@ llvm::canConstantFoldCallTo(const Function *F) {
   case 'c':
     return Name == "cos" || Name == "ceil" || Name == "cosf" || Name == "cosh";
   case 'e':
-    return Name == "exp";
+    return Name == "exp" || Name == "exp2";
   case 'f':
     return Name == "fabs" || Name == "fmod" || Name == "floor";
   case 'l':
@@ -1220,6 +1221,12 @@ llvm::ConstantFoldCall(Function *F,
       case 'e':
         if (Name == "exp")
           return ConstantFoldFP(exp, V, Ty);
+  
+        if (Name == "exp2") {
+          // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a
+          // C99 library.
+          return ConstantFoldBinaryFP(pow, 2.0, V, Ty);
+        }
         break;
       case 'f':
         if (Name == "fabs")
diff --git a/lib/Analysis/DIBuilder.cpp b/lib/Analysis/DIBuilder.cpp
index 108d2d2..ef5d03a 100644
--- a/lib/Analysis/DIBuilder.cpp
+++ b/lib/Analysis/DIBuilder.cpp
@@ -50,7 +50,11 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
     MDString::get(VMContext, Flags),
     ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer)
   };
-  TheCU = DICompileUnit(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  TheCU = DICompileUnit(MDNode::get(VMContext, Elts));
+
+  // Create a named metadata so that it is easier to find cu in a module.
+  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.cu");
+  NMD->addOperand(TheCU);
 }
 
 /// createFile - Create a file descriptor to hold debugging information
@@ -63,7 +67,7 @@ DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) {
     MDString::get(VMContext, Directory),
     TheCU
   };
-  return DIFile(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DIFile(MDNode::get(VMContext, Elts));
 }
 
 /// createEnumerator - Create a single enumerator value.
@@ -73,7 +77,7 @@ DIEnumerator DIBuilder::createEnumerator(StringRef Name, uint64_t Val) {
     MDString::get(VMContext, Name),
     ConstantInt::get(Type::getInt64Ty(VMContext), Val)
   };
-  return DIEnumerator(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DIEnumerator(MDNode::get(VMContext, Elts));
 }
 
 /// createBasicType - Create debugging information entry for a basic 
@@ -95,7 +99,7 @@ DIType DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits,
     ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags;
     ConstantInt::get(Type::getInt32Ty(VMContext), Encoding)
   };
-  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DIType(MDNode::get(VMContext, Elts));
 }
 
 /// createQaulifiedType - Create debugging information entry for a qualified
@@ -114,7 +118,7 @@ DIType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) {
     ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
     FromTy
   };
-  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DIType(MDNode::get(VMContext, Elts));
 }
 
 /// createPointerType - Create debugging information entry for a pointer.
@@ -133,7 +137,7 @@ DIType DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits,
     ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
     PointeeTy
   };
-  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DIType(MDNode::get(VMContext, Elts));
 }
 
 /// createReferenceType - Create debugging information entry for a reference.
@@ -151,17 +155,17 @@ DIType DIBuilder::createReferenceType(DIType RTy) {
     ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
     RTy
   };
-  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DIType(MDNode::get(VMContext, Elts));
 }
 
 /// createTypedef - Create debugging information entry for a typedef.
 DIType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File,
-                                unsigned LineNo) {
+                                unsigned LineNo, DIDescriptor Context) {
   // typedefs are encoded in DIDerivedType format.
   assert(Ty.Verify() && "Invalid typedef type!");
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_typedef),
-    Ty.getContext(),
+    Context,
     MDString::get(VMContext, Name),
     File,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
@@ -171,7 +175,7 @@ DIType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File,
     ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
     Ty
   };
-  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DIType(MDNode::get(VMContext, Elts));
 }
 
 /// createFriend - Create debugging information entry for a 'friend'.
@@ -191,7 +195,7 @@ DIType DIBuilder::createFriend(DIType Ty, DIType FriendTy) {
     ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
     FriendTy
   };
-  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DIType(MDNode::get(VMContext, Elts));
 }
 
 /// createInheritance - Create debugging information entry to establish
@@ -211,7 +215,7 @@ DIType DIBuilder::createInheritance(DIType Ty, DIType BaseTy,
     ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
     BaseTy
   };
-  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DIType(MDNode::get(VMContext, Elts));
 }
 
 /// createMemberType - Create debugging information entry for a member.
@@ -233,7 +237,36 @@ DIType DIBuilder::createMemberType(StringRef Name,
     ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
     Ty
   };
-  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createObjCIVar - Create debugging information entry for Objective-C
+/// instance variable.
+DIType DIBuilder::createObjCIVar(StringRef Name, 
+                                 DIFile File, unsigned LineNumber, 
+                                 uint64_t SizeInBits, uint64_t AlignInBits,
+                                 uint64_t OffsetInBits, unsigned Flags, 
+                                 DIType Ty, StringRef PropertyName,
+                                 StringRef GetterName, StringRef SetterName,
+                                 unsigned PropertyAttributes) {
+  // TAG_member is encoded in DIDerivedType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_member),
+    File, // Or TheCU ? Ty ?
+    MDString::get(VMContext, Name),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    Ty,
+    MDString::get(VMContext, PropertyName),
+    MDString::get(VMContext, GetterName),
+    MDString::get(VMContext, SetterName),
+    ConstantInt::get(Type::getInt32Ty(VMContext), PropertyAttributes)
+  };
+  return DIType(MDNode::get(VMContext, Elts));
 }
 
 /// createClassType - Create debugging information entry for a class.
@@ -260,7 +293,7 @@ DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
     VTableHoder,
     TemplateParams
   };
-  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DIType(MDNode::get(VMContext, Elts));
 }
 
 /// createTemplateTypeParameter - Create debugging information for template
@@ -278,8 +311,7 @@ DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
     ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo)
   };
-  return DITemplateTypeParameter(MDNode::get(VMContext, &Elts[0], 
-                                             array_lengthof(Elts)));
+  return DITemplateTypeParameter(MDNode::get(VMContext, Elts));
 }
 
 /// createTemplateValueParameter - Create debugging information for template
@@ -299,8 +331,7 @@ DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
     ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo)
   };
-  return DITemplateValueParameter(MDNode::get(VMContext, &Elts[0], 
-                                              array_lengthof(Elts)));
+  return DITemplateValueParameter(MDNode::get(VMContext, Elts));
 }
 
 /// createStructType - Create debugging information entry for a struct.
@@ -325,7 +356,7 @@ DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name,
     ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang),
     llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
   };
-  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DIType(MDNode::get(VMContext, Elts));
 }
 
 /// createUnionType - Create debugging information entry for an union.
@@ -350,7 +381,7 @@ DIType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name,
     ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang),
     llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
   };
-  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DIType(MDNode::get(VMContext, Elts));
 }
 
 /// createSubroutineType - Create subroutine type.
@@ -371,7 +402,7 @@ DIType DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) {
     ConstantInt::get(Type::getInt32Ty(VMContext), 0),
     llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
   };
-  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DIType(MDNode::get(VMContext, Elts));
 }
 
 /// createEnumerationType - Create debugging information entry for an 
@@ -396,7 +427,7 @@ DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name,
     ConstantInt::get(Type::getInt32Ty(VMContext), 0),
     llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
   };
-  MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts));
+  MDNode *Node = MDNode::get(VMContext, Elts);
   NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.enum");
   NMD->addOperand(Node);
   return DIType(Node);
@@ -421,7 +452,7 @@ DIType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits,
     ConstantInt::get(Type::getInt32Ty(VMContext), 0),
     llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
   };
-  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DIType(MDNode::get(VMContext, Elts));
 }
 
 /// createVectorType - Create debugging information entry for a vector.
@@ -443,7 +474,7 @@ DIType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits,
     ConstantInt::get(Type::getInt32Ty(VMContext), 0),
     llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
   };
-  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DIType(MDNode::get(VMContext, Elts));
 }
 
 /// createArtificialType - Create a new DIType with "artificial" flag set.
@@ -467,7 +498,7 @@ DIType DIBuilder::createArtificialType(DIType Ty) {
   // Flags are stored at this slot.
   Elts[8] =  ConstantInt::get(Type::getInt32Ty(VMContext), CurFlags);
 
-  return DIType(MDNode::get(VMContext, Elts.data(), Elts.size()));
+  return DIType(MDNode::get(VMContext, Elts));
 }
 
 /// retainType - Retain DIType in a module even if it is not referenced 
@@ -483,7 +514,7 @@ DIDescriptor DIBuilder::createUnspecifiedParameter() {
   Value *Elts[] = { 
     GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_parameters) 
   };
-  return DIDescriptor(MDNode::get(VMContext, &Elts[0], 1));
+  return DIDescriptor(MDNode::get(VMContext, Elts));
 }
 
 /// createTemporaryType - Create a temporary forward-declared type.
@@ -491,7 +522,7 @@ DIType DIBuilder::createTemporaryType() {
   // Give the temporary MDNode a tag. It doesn't matter what tag we
   // use here as long as DIType accepts it.
   Value *Elts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
-  MDNode *Node = MDNode::getTemporary(VMContext, Elts, array_lengthof(Elts));
+  MDNode *Node = MDNode::getTemporary(VMContext, Elts);
   return DIType(Node);
 }
 
@@ -505,17 +536,17 @@ DIType DIBuilder::createTemporaryType(DIFile F) {
     NULL,
     F
   };
-  MDNode *Node = MDNode::getTemporary(VMContext, Elts, array_lengthof(Elts));
+  MDNode *Node = MDNode::getTemporary(VMContext, Elts);
   return DIType(Node);
 }
 
 /// getOrCreateArray - Get a DIArray, create one if required.
-DIArray DIBuilder::getOrCreateArray(Value *const *Elements, unsigned NumElements) {
-  if (NumElements == 0) {
+DIArray DIBuilder::getOrCreateArray(ArrayRef<Value *> Elements) {
+  if (Elements.empty()) {
     Value *Null = llvm::Constant::getNullValue(Type::getInt32Ty(VMContext));
-    return DIArray(MDNode::get(VMContext, &Null, 1));
+    return DIArray(MDNode::get(VMContext, Null));
   }
-  return DIArray(MDNode::get(VMContext, Elements, NumElements));
+  return DIArray(MDNode::get(VMContext, Elements));
 }
 
 /// getOrCreateSubrange - Create a descriptor for a value range.  This
@@ -527,7 +558,7 @@ DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Hi) {
     ConstantInt::get(Type::getInt64Ty(VMContext), Hi)
   };
 
-  return DISubrange(MDNode::get(VMContext, &Elts[0], 3));
+  return DISubrange(MDNode::get(VMContext, Elts));
 }
 
 /// createGlobalVariable - Create a new descriptor for the specified global.
@@ -548,7 +579,7 @@ createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber,
     ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/
     Val
   };
-  MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts));
+  MDNode *Node = MDNode::get(VMContext, Elts);
   // Create a named metadata so that we do not lose this mdnode.
   NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
   NMD->addOperand(Node);
@@ -575,7 +606,7 @@ createStaticVariable(DIDescriptor Context, StringRef Name,
     ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/
     Val
   };
-  MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts));
+  MDNode *Node = MDNode::get(VMContext, Elts);
   // Create a named metadata so that we do not lose this mdnode.
   NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
   NMD->addOperand(Node);
@@ -597,7 +628,7 @@ DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope,
     Ty,
     ConstantInt::get(Type::getInt32Ty(VMContext), Flags)
   };
-  MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts));
+  MDNode *Node = MDNode::get(VMContext, Elts);
   if (AlwaysPreserve) {
     // The optimizer may remove local variable. If there is an interest
     // to preserve variable info in such situation then stash it in a
@@ -620,8 +651,8 @@ DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope,
 DIVariable DIBuilder::createComplexVariable(unsigned Tag, DIDescriptor Scope,
                                             StringRef Name, DIFile F,
                                             unsigned LineNo,
-                                            DIType Ty, Value *const *Addr,
-                                            unsigned NumAddr, unsigned ArgNo) {
+                                            DIType Ty, ArrayRef<Value *> Addr,
+                                            unsigned ArgNo) {
   SmallVector<Value *, 15> Elts;
   Elts.push_back(GetTagConstant(VMContext, Tag));
   Elts.push_back(Scope);
@@ -629,9 +660,10 @@ DIVariable DIBuilder::createComplexVariable(unsigned Tag, DIDescriptor Scope,
   Elts.push_back(F);
   Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), (LineNo | (ArgNo << 24))));
   Elts.push_back(Ty);
-  Elts.append(Addr, Addr+NumAddr);
+  Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)));
+  Elts.append(Addr.begin(), Addr.end());
 
-  return DIVariable(MDNode::get(VMContext, Elts.data(), Elts.size()));
+  return DIVariable(MDNode::get(VMContext, Elts));
 }
 
 /// createFunction - Create a new descriptor for the specified function.
@@ -643,7 +675,8 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context,
                                        bool isLocalToUnit, bool isDefinition,
                                        unsigned Flags, bool isOptimized,
                                        Function *Fn,
-                                       MDNode *TParams) {
+                                       MDNode *TParams,
+                                       MDNode *Decl) {
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
     llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
@@ -662,9 +695,10 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context,
     ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
     ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
     Fn,
-    TParams
+    TParams,
+    Decl
   };
-  MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts));
+  MDNode *Node = MDNode::get(VMContext, Elts);
 
   // Create a named metadata so that we do not lose this mdnode.
   NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
@@ -706,7 +740,7 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context,
     Fn,
     TParam,
   };
-  MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts));
+  MDNode *Node = MDNode::get(VMContext, Elts);
 
   // Create a named metadata so that we do not lose this mdnode.
   NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
@@ -725,7 +759,7 @@ DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name,
     File,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)
   };
-  return DINameSpace(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DINameSpace(MDNode::get(VMContext, Elts));
 }
 
 DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File,
@@ -740,7 +774,7 @@ DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File,
     File,
     ConstantInt::get(Type::getInt32Ty(VMContext), unique_id++)
   };
-  return DILexicalBlock(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+  return DILexicalBlock(MDNode::get(VMContext, Elts));
 }
 
 /// insertDeclare - Insert a new llvm.dbg.declare intrinsic call.
@@ -751,7 +785,7 @@ Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
   if (!DeclareFn)
     DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
 
-  Value *Args[] = { MDNode::get(Storage->getContext(), &Storage, 1), VarInfo };
+  Value *Args[] = { MDNode::get(Storage->getContext(), Storage), VarInfo };
   return CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore);
 }
 
@@ -763,7 +797,7 @@ Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
   if (!DeclareFn)
     DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
 
-  Value *Args[] = { MDNode::get(Storage->getContext(), &Storage, 1), VarInfo };
+  Value *Args[] = { MDNode::get(Storage->getContext(), Storage), VarInfo };
 
   // If this block already has a terminator then insert this intrinsic
   // before the terminator.
@@ -782,7 +816,7 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
   if (!ValueFn)
     ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
 
-  Value *Args[] = { MDNode::get(V->getContext(), &V, 1),
+  Value *Args[] = { MDNode::get(V->getContext(), V),
                     ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
                     VarInfo };
   return CallInst::Create(ValueFn, Args, Args+3, "", InsertBefore);
@@ -797,7 +831,7 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
   if (!ValueFn)
     ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
 
-  Value *Args[] = { MDNode::get(V->getContext(), &V, 1),
+  Value *Args[] = { MDNode::get(V->getContext(), V),
                     ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
                     VarInfo };
   return CallInst::Create(ValueFn, Args, Args+3, "", InsertAtEnd);
diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp
index 690c4b4..2e79eab 100644
--- a/lib/Analysis/IPA/CallGraph.cpp
+++ b/lib/Analysis/IPA/CallGraph.cpp
@@ -148,7 +148,7 @@ private:
       for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
            II != IE; ++II) {
         CallSite CS(cast<Value>(II));
-        if (CS && !isa<DbgInfoIntrinsic>(II)) {
+        if (CS && !isa<IntrinsicInst>(II)) {
           const Function *Callee = CS.getCalledFunction();
           if (Callee)
             Node->addCalledFunction(CS, getOrInsertFunction(Callee));
diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp
index 725ab72..659ffab 100644
--- a/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -245,8 +245,8 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
     
     for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
       for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
-          CallSite CS(cast<Value>(I));
-        if (!CS || isa<DbgInfoIntrinsic>(I)) continue;
+        CallSite CS(cast<Value>(I));
+        if (!CS || isa<IntrinsicInst>(I)) continue;
         
         // If this call site already existed in the callgraph, just verify it
         // matches up to expectations and remove it from CallSites.
diff --git a/lib/Analysis/IPA/FindUsedTypes.cpp b/lib/Analysis/IPA/FindUsedTypes.cpp
index 06ae34c..dde2556 100644
--- a/lib/Analysis/IPA/FindUsedTypes.cpp
+++ b/lib/Analysis/IPA/FindUsedTypes.cpp
@@ -32,7 +32,7 @@ INITIALIZE_PASS(FindUsedTypes, "print-used-types",
 void FindUsedTypes::IncorporateType(const Type *Ty) {
   // If ty doesn't already exist in the used types map, add it now, otherwise
   // return.
-  if (!UsedTypes.insert(Ty).second) return;  // Already contain Ty.
+  if (!UsedTypes.insert(Ty)) return;  // Already contain Ty.
 
   // Make sure to add any types this type references now.
   //
@@ -94,7 +94,7 @@ bool FindUsedTypes::runOnModule(Module &m) {
 //
 void FindUsedTypes::print(raw_ostream &OS, const Module *M) const {
   OS << "Types in use by this module:\n";
-  for (std::set<const Type *>::const_iterator I = UsedTypes.begin(),
+  for (SetVector<const Type *>::const_iterator I = UsedTypes.begin(),
        E = UsedTypes.end(); I != E; ++I) {
     OS << "   ";
     WriteTypeSymbolic(OS, *I, M);
diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp
index 116aaf4..b226d66 100644
--- a/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -602,7 +602,7 @@ void GlobalsModRef::addEscapingUse(Use &U) {
   // For the purposes of this analysis, it is conservatively correct to treat
   // a newly escaping value equivalently to a deleted one.  We could perhaps
   // be more precise by processing the new use and attempting to update our
-  // saved analysis results to accomodate it.
+  // saved analysis results to accommodate it.
   deleteValue(U);
   
   AliasAnalysis::addEscapingUse(U);
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index 2cda791..a0c42f0 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/ADT/STLExtras.h"
@@ -38,6 +39,15 @@ INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
 INITIALIZE_PASS_END(IVUsers, "iv-users",
                       "Induction Variable Users", false, true)
 
+// IVUsers behavior currently depends on this temporary indvars mode. The
+// option must be defined upstream from its uses.
+namespace llvm {
+  bool DisableIVRewrite = false;
+}
+cl::opt<bool, true> DisableIVRewriteOpt(
+  "disable-iv-rewrite", cl::Hidden, cl::location(llvm::DisableIVRewrite),
+  cl::desc("Disable canonical induction variable rewriting"));
+
 Pass *llvm::createIVUsersPass() {
   return new IVUsers();
 }
@@ -79,7 +89,7 @@ static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L,
 /// AddUsersIfInteresting - Inspect the specified instruction.  If it is a
 /// reducible SCEV, recursively add its users to the IVUsesByStride set and
 /// return true.  Otherwise, return false.
-bool IVUsers::AddUsersIfInteresting(Instruction *I) {
+bool IVUsers::AddUsersIfInteresting(Instruction *I, PHINode *Phi) {
   if (!SE->isSCEVable(I->getType()))
     return false;   // Void and FP expressions cannot be reduced.
 
@@ -90,6 +100,11 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
   if (Width > 64 || (TD && !TD->isLegalInteger(Width)))
     return false;
 
+  // We expect Sign/Zero extension to be eliminated from the IR before analyzing
+  // any downstream uses.
+  if (DisableIVRewrite && (isa<SExtInst>(I) || isa<ZExtInst>(I)))
+    return false;
+
   if (!Processed.insert(I))
     return true;    // Instruction already handled.
 
@@ -121,13 +136,13 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
     bool AddUserToIVUsers = false;
     if (LI->getLoopFor(User->getParent()) != L) {
       if (isa<PHINode>(User) || Processed.count(User) ||
-          !AddUsersIfInteresting(User)) {
+          !AddUsersIfInteresting(User, Phi)) {
         DEBUG(dbgs() << "FOUND USER in other loop: " << *User << '\n'
                      << "   OF SCEV: " << *ISE << '\n');
         AddUserToIVUsers = true;
       }
     } else if (Processed.count(User) ||
-               !AddUsersIfInteresting(User)) {
+               !AddUsersIfInteresting(User, Phi)) {
       DEBUG(dbgs() << "FOUND USER: " << *User << '\n'
                    << "   OF SCEV: " << *ISE << '\n');
       AddUserToIVUsers = true;
@@ -135,9 +150,11 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
 
     if (AddUserToIVUsers) {
       // Okay, we found a user that we cannot reduce.
-      IVUses.push_back(new IVStrideUse(this, User, I));
+      IVUses.push_back(new IVStrideUse(this, User, I, Phi));
       IVStrideUse &NewUse = IVUses.back();
-      // Transform the expression into a normalized form.
+      // Autodetect the post-inc loop set, populating NewUse.PostIncLoops.
+      // The regular return value here is discarded; instead of recording
+      // it, we just recompute it when we need it.
       ISE = TransformForPostIncUse(NormalizeAutodetect,
                                    ISE, User, I,
                                    NewUse.PostIncLoops,
@@ -148,8 +165,8 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
   return true;
 }
 
-IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) {
-  IVUses.push_back(new IVStrideUse(this, User, Operand));
+IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand, PHINode *Phi) {
+  IVUses.push_back(new IVStrideUse(this, User, Operand, Phi));
   return IVUses.back();
 }
 
@@ -177,7 +194,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
   // them by stride.  Start by finding all of the PHI nodes in the header for
   // this loop.  If they are induction variables, inspect their uses.
   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I)
-    (void)AddUsersIfInteresting(I);
+    (void)AddUsersIfInteresting(I, cast<PHINode>(I));
 
   return false;
 }
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 47f91cf..efde598 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -66,21 +66,13 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
 
       ImmutableCallSite CS(cast<Instruction>(II));
 
-      // If this function contains a call to setjmp or _setjmp, never inline
-      // it.  This is a hack because we depend on the user marking their local
-      // variables as volatile if they are live across a setjmp call, and they
-      // probably won't do this in callers.
       if (const Function *F = CS.getCalledFunction()) {
         // If a function is both internal and has a single use, then it is 
         // extremely likely to get inlined in the future (it was probably 
         // exposed by an interleaved devirtualization pass).
         if (F->hasInternalLinkage() && F->hasOneUse())
           ++NumInlineCandidates;
-        
-        if (F->isDeclaration() && 
-            (F->getName() == "setjmp" || F->getName() == "_setjmp"))
-          callsSetJmp = true;
-       
+
         // If this call is to function itself, then the function is recursive.
         // Inlining it into other functions is a bad idea, because this is
         // basically just a form of loop peeling, and our metrics aren't useful
@@ -226,6 +218,13 @@ unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) {
 /// analyzeFunction - Fill in the current structure with information gleaned
 /// from the specified function.
 void CodeMetrics::analyzeFunction(Function *F) {
+  // If this function contains a call to setjmp or _setjmp, never inline
+  // it.  This is a hack because we depend on the user marking their local
+  // variables as volatile if they are live across a setjmp call, and they
+  // probably won't do this in callers.
+  if (F->callsFunctionThatReturnsTwice())
+    callsSetJmp = true;
+
   // Look at the size of the callee.
   for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
     analyzeBasicBlock(&*BB);
@@ -501,7 +500,7 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
     return InlineCost::getAlways();
     
   if (CalleeFI->Metrics.usesDynamicAlloca) {
-    // Get infomation about the caller.
+    // Get information about the caller.
     FunctionInfo &CallerFI = CachedFunctionInfo[Caller];
 
     // If we haven't calculated this information yet, do so now.
@@ -549,7 +548,7 @@ InlineCost InlineCostAnalyzer::getSpecializationCost(Function *Callee,
 
   int Cost = 0;
   
-  // Look at the orginal size of the callee.  Each instruction counts as 5.
+  // Look at the original size of the callee.  Each instruction counts as 5.
   Cost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost;
 
   // Offset that with the amount of code that can be constant-folded
@@ -594,7 +593,7 @@ InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) {
   CodeMetrics &CallerMetrics = CachedFunctionInfo[Caller].Metrics;
 
   // For small functions we prefer to recalculate the cost for better accuracy.
-  if (CallerMetrics.NumBlocks < 10 || CallerMetrics.NumInsts < 1000) {
+  if (CallerMetrics.NumBlocks < 10 && CallerMetrics.NumInsts < 1000) {
     resetCachedCostInfo(Caller);
     return;
   }
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 9dd5f05..9d78f8b 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -18,6 +18,7 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "instsimplify"
+#include "llvm/Operator.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/ConstantFolding.h"
@@ -900,6 +901,109 @@ Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *TD,
   return ::SimplifyFDivInst(Op0, Op1, TD, DT, RecursionLimit);
 }
 
+/// SimplifyRem - Given operands for an SRem or URem, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
+                          const TargetData *TD, const DominatorTree *DT,
+                          unsigned MaxRecurse) {
+  if (Constant *C0 = dyn_cast<Constant>(Op0)) {
+    if (Constant *C1 = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { C0, C1 };
+      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, 2, TD);
+    }
+  }
+
+  // X % undef -> undef
+  if (match(Op1, m_Undef()))
+    return Op1;
+
+  // undef % X -> 0
+  if (match(Op0, m_Undef()))
+    return Constant::getNullValue(Op0->getType());
+
+  // 0 % X -> 0, we don't need to preserve faults!
+  if (match(Op0, m_Zero()))
+    return Op0;
+
+  // X % 0 -> undef, we don't need to preserve faults!
+  if (match(Op1, m_Zero()))
+    return UndefValue::get(Op0->getType());
+
+  // X % 1 -> 0
+  if (match(Op1, m_One()))
+    return Constant::getNullValue(Op0->getType());
+
+  if (Op0->getType()->isIntegerTy(1))
+    // It can't be remainder by zero, hence it must be remainder by one.
+    return Constant::getNullValue(Op0->getType());
+
+  // X % X -> 0
+  if (Op0 == Op1)
+    return Constant::getNullValue(Op0->getType());
+
+  // If the operation is with the result of a select instruction, check whether
+  // operating on either branch of the select always yields the same value.
+  if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+    if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+      return V;
+
+  // If the operation is with the result of a phi instruction, check whether
+  // operating on all incoming values of the phi always yields the same value.
+  if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+    if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+      return V;
+
+  return 0;
+}
+
+/// SimplifySRemInst - Given operands for an SRem, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD,
+                               const DominatorTree *DT, unsigned MaxRecurse) {
+  if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, TD, DT, MaxRecurse))
+    return V;
+
+  return 0;
+}
+
+Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const DominatorTree *DT) {
+  return ::SimplifySRemInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyURemInst - Given operands for a URem, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD,
+                               const DominatorTree *DT, unsigned MaxRecurse) {
+  if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, TD, DT, MaxRecurse))
+    return V;
+
+  return 0;
+}
+
+Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const DominatorTree *DT) {
+  return ::SimplifyURemInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *,
+                               const DominatorTree *, unsigned) {
+  // undef % X -> undef    (the undef could be a snan).
+  if (match(Op0, m_Undef()))
+    return Op0;
+
+  // X % undef -> undef
+  if (match(Op1, m_Undef()))
+    return Op1;
+
+  return 0;
+}
+
+Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const DominatorTree *DT) {
+  return ::SimplifyFRemInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
 /// SimplifyShift - Given operands for an Shl, LShr or AShr, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
@@ -1272,6 +1376,26 @@ static const Type *GetCompareTy(Value *Op) {
   return CmpInst::makeCmpResultType(Op->getType());
 }
 
+/// ExtractEquivalentCondition - Rummage around inside V looking for something
+/// equivalent to the comparison "LHS Pred RHS".  Return such a value if found,
+/// otherwise return null.  Helper function for analyzing max/min idioms.
+static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
+                                         Value *LHS, Value *RHS) {
+  SelectInst *SI = dyn_cast<SelectInst>(V);
+  if (!SI)
+    return 0;
+  CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
+  if (!Cmp)
+    return 0;
+  Value *CmpLHS = Cmp->getOperand(0), *CmpRHS = Cmp->getOperand(1);
+  if (Pred == Cmp->getPredicate() && LHS == CmpLHS && RHS == CmpRHS)
+    return Cmp;
+  if (Pred == CmpInst::getSwappedPredicate(Cmp->getPredicate()) &&
+      LHS == CmpRHS && RHS == CmpLHS)
+    return Cmp;
+  return 0;
+}
+
 /// SimplifyICmpInst - Given operands for an ICmpInst, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
@@ -1354,46 +1478,48 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     default:
       assert(false && "Unknown ICmp predicate!");
     case ICmpInst::ICMP_ULT:
-      return ConstantInt::getFalse(LHS->getContext());
+      // getNullValue also works for vectors, unlike getFalse.
+      return Constant::getNullValue(ITy);
     case ICmpInst::ICMP_UGE:
-      return ConstantInt::getTrue(LHS->getContext());
+      // getAllOnesValue also works for vectors, unlike getTrue.
+      return ConstantInt::getAllOnesValue(ITy);
     case ICmpInst::ICMP_EQ:
     case ICmpInst::ICMP_ULE:
       if (isKnownNonZero(LHS, TD))
-        return ConstantInt::getFalse(LHS->getContext());
+        return Constant::getNullValue(ITy);
       break;
     case ICmpInst::ICMP_NE:
     case ICmpInst::ICMP_UGT:
       if (isKnownNonZero(LHS, TD))
-        return ConstantInt::getTrue(LHS->getContext());
+        return ConstantInt::getAllOnesValue(ITy);
       break;
     case ICmpInst::ICMP_SLT:
       ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
       if (LHSKnownNegative)
-        return ConstantInt::getTrue(LHS->getContext());
+        return ConstantInt::getAllOnesValue(ITy);
       if (LHSKnownNonNegative)
-        return ConstantInt::getFalse(LHS->getContext());
+        return Constant::getNullValue(ITy);
       break;
     case ICmpInst::ICMP_SLE:
       ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
       if (LHSKnownNegative)
-        return ConstantInt::getTrue(LHS->getContext());
+        return ConstantInt::getAllOnesValue(ITy);
       if (LHSKnownNonNegative && isKnownNonZero(LHS, TD))
-        return ConstantInt::getFalse(LHS->getContext());
+        return Constant::getNullValue(ITy);
       break;
     case ICmpInst::ICMP_SGE:
       ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
       if (LHSKnownNegative)
-        return ConstantInt::getFalse(LHS->getContext());
+        return Constant::getNullValue(ITy);
       if (LHSKnownNonNegative)
-        return ConstantInt::getTrue(LHS->getContext());
+        return ConstantInt::getAllOnesValue(ITy);
       break;
     case ICmpInst::ICMP_SGT:
       ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
       if (LHSKnownNegative)
-        return ConstantInt::getFalse(LHS->getContext());
+        return Constant::getNullValue(ITy);
       if (LHSKnownNonNegative && isKnownNonZero(LHS, TD))
-        return ConstantInt::getTrue(LHS->getContext());
+        return ConstantInt::getAllOnesValue(ITy);
       break;
     }
   }
@@ -1685,7 +1811,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     case ICmpInst::ICMP_EQ:
     case ICmpInst::ICMP_UGT:
     case ICmpInst::ICMP_UGE:
-      return ConstantInt::getFalse(RHS->getContext());
+      // getNullValue also works for vectors, unlike getFalse.
+      return Constant::getNullValue(ITy);
     case ICmpInst::ICMP_SLT:
     case ICmpInst::ICMP_SLE:
       ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD);
@@ -1695,7 +1822,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     case ICmpInst::ICMP_NE:
     case ICmpInst::ICMP_ULT:
     case ICmpInst::ICMP_ULE:
-      return ConstantInt::getTrue(RHS->getContext());
+      // getAllOnesValue also works for vectors, unlike getTrue.
+      return Constant::getAllOnesValue(ITy);
     }
   }
   if (RBO && match(RBO, m_URem(m_Value(), m_Specific(LHS)))) {
@@ -1712,7 +1840,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     case ICmpInst::ICMP_NE:
     case ICmpInst::ICMP_UGT:
     case ICmpInst::ICMP_UGE:
-      return ConstantInt::getTrue(RHS->getContext());
+      // getAllOnesValue also works for vectors, unlike getTrue.
+      return Constant::getAllOnesValue(ITy);
     case ICmpInst::ICMP_SLT:
     case ICmpInst::ICMP_SLE:
       ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD);
@@ -1722,7 +1851,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     case ICmpInst::ICMP_EQ:
     case ICmpInst::ICMP_ULT:
     case ICmpInst::ICMP_ULE:
-      return ConstantInt::getFalse(RHS->getContext());
+      // getNullValue also works for vectors, unlike getFalse.
+      return Constant::getNullValue(ITy);
     }
   }
 
@@ -1737,7 +1867,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       // fall-through
     case Instruction::SDiv:
     case Instruction::AShr:
-      if (!LBO->isExact() && !RBO->isExact())
+      if (!LBO->isExact() || !RBO->isExact())
         break;
       if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
                                       RBO->getOperand(0), TD, DT, MaxRecurse-1))
@@ -1758,6 +1888,194 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     }
   }
 
+  // Simplify comparisons involving max/min.
+  Value *A, *B;
+  CmpInst::Predicate P = CmpInst::BAD_ICMP_PREDICATE;
+  CmpInst::Predicate EqP; // Chosen so that "A == max/min(A,B)" iff "A EqP B".
+
+  // Signed variants on "max(a,b)>=a -> true".
+  if (match(LHS, m_SMax(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) {
+    if (A != RHS) std::swap(A, B); // smax(A, B) pred A.
+    EqP = CmpInst::ICMP_SGE; // "A == smax(A, B)" iff "A sge B".
+    // We analyze this as smax(A, B) pred A.
+    P = Pred;
+  } else if (match(RHS, m_SMax(m_Value(A), m_Value(B))) &&
+             (A == LHS || B == LHS)) {
+    if (A != LHS) std::swap(A, B); // A pred smax(A, B).
+    EqP = CmpInst::ICMP_SGE; // "A == smax(A, B)" iff "A sge B".
+    // We analyze this as smax(A, B) swapped-pred A.
+    P = CmpInst::getSwappedPredicate(Pred);
+  } else if (match(LHS, m_SMin(m_Value(A), m_Value(B))) &&
+             (A == RHS || B == RHS)) {
+    if (A != RHS) std::swap(A, B); // smin(A, B) pred A.
+    EqP = CmpInst::ICMP_SLE; // "A == smin(A, B)" iff "A sle B".
+    // We analyze this as smax(-A, -B) swapped-pred -A.
+    // Note that we do not need to actually form -A or -B thanks to EqP.
+    P = CmpInst::getSwappedPredicate(Pred);
+  } else if (match(RHS, m_SMin(m_Value(A), m_Value(B))) &&
+             (A == LHS || B == LHS)) {
+    if (A != LHS) std::swap(A, B); // A pred smin(A, B).
+    EqP = CmpInst::ICMP_SLE; // "A == smin(A, B)" iff "A sle B".
+    // We analyze this as smax(-A, -B) pred -A.
+    // Note that we do not need to actually form -A or -B thanks to EqP.
+    P = Pred;
+  }
+  if (P != CmpInst::BAD_ICMP_PREDICATE) {
+    // Cases correspond to "max(A, B) p A".
+    switch (P) {
+    default:
+      break;
+    case CmpInst::ICMP_EQ:
+    case CmpInst::ICMP_SLE:
+      // Equivalent to "A EqP B".  This may be the same as the condition tested
+      // in the max/min; if so, we can just return that.
+      if (Value *V = ExtractEquivalentCondition(LHS, EqP, A, B))
+        return V;
+      if (Value *V = ExtractEquivalentCondition(RHS, EqP, A, B))
+        return V;
+      // Otherwise, see if "A EqP B" simplifies.
+      if (MaxRecurse)
+        if (Value *V = SimplifyICmpInst(EqP, A, B, TD, DT, MaxRecurse-1))
+          return V;
+      break;
+    case CmpInst::ICMP_NE:
+    case CmpInst::ICMP_SGT: {
+      CmpInst::Predicate InvEqP = CmpInst::getInversePredicate(EqP);
+      // Equivalent to "A InvEqP B".  This may be the same as the condition
+      // tested in the max/min; if so, we can just return that.
+      if (Value *V = ExtractEquivalentCondition(LHS, InvEqP, A, B))
+        return V;
+      if (Value *V = ExtractEquivalentCondition(RHS, InvEqP, A, B))
+        return V;
+      // Otherwise, see if "A InvEqP B" simplifies.
+      if (MaxRecurse)
+        if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, DT, MaxRecurse-1))
+          return V;
+      break;
+    }
+    case CmpInst::ICMP_SGE:
+      // Always true.
+      return Constant::getAllOnesValue(ITy);
+    case CmpInst::ICMP_SLT:
+      // Always false.
+      return Constant::getNullValue(ITy);
+    }
+  }
+
+  // Unsigned variants on "max(a,b)>=a -> true".
+  P = CmpInst::BAD_ICMP_PREDICATE;
+  if (match(LHS, m_UMax(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) {
+    if (A != RHS) std::swap(A, B); // umax(A, B) pred A.
+    EqP = CmpInst::ICMP_UGE; // "A == umax(A, B)" iff "A uge B".
+    // We analyze this as umax(A, B) pred A.
+    P = Pred;
+  } else if (match(RHS, m_UMax(m_Value(A), m_Value(B))) &&
+             (A == LHS || B == LHS)) {
+    if (A != LHS) std::swap(A, B); // A pred umax(A, B).
+    EqP = CmpInst::ICMP_UGE; // "A == umax(A, B)" iff "A uge B".
+    // We analyze this as umax(A, B) swapped-pred A.
+    P = CmpInst::getSwappedPredicate(Pred);
+  } else if (match(LHS, m_UMin(m_Value(A), m_Value(B))) &&
+             (A == RHS || B == RHS)) {
+    if (A != RHS) std::swap(A, B); // umin(A, B) pred A.
+    EqP = CmpInst::ICMP_ULE; // "A == umin(A, B)" iff "A ule B".
+    // We analyze this as umax(-A, -B) swapped-pred -A.
+    // Note that we do not need to actually form -A or -B thanks to EqP.
+    P = CmpInst::getSwappedPredicate(Pred);
+  } else if (match(RHS, m_UMin(m_Value(A), m_Value(B))) &&
+             (A == LHS || B == LHS)) {
+    if (A != LHS) std::swap(A, B); // A pred umin(A, B).
+    EqP = CmpInst::ICMP_ULE; // "A == umin(A, B)" iff "A ule B".
+    // We analyze this as umax(-A, -B) pred -A.
+    // Note that we do not need to actually form -A or -B thanks to EqP.
+    P = Pred;
+  }
+  if (P != CmpInst::BAD_ICMP_PREDICATE) {
+    // Cases correspond to "max(A, B) p A".
+    switch (P) {
+    default:
+      break;
+    case CmpInst::ICMP_EQ:
+    case CmpInst::ICMP_ULE:
+      // Equivalent to "A EqP B".  This may be the same as the condition tested
+      // in the max/min; if so, we can just return that.
+      if (Value *V = ExtractEquivalentCondition(LHS, EqP, A, B))
+        return V;
+      if (Value *V = ExtractEquivalentCondition(RHS, EqP, A, B))
+        return V;
+      // Otherwise, see if "A EqP B" simplifies.
+      if (MaxRecurse)
+        if (Value *V = SimplifyICmpInst(EqP, A, B, TD, DT, MaxRecurse-1))
+          return V;
+      break;
+    case CmpInst::ICMP_NE:
+    case CmpInst::ICMP_UGT: {
+      CmpInst::Predicate InvEqP = CmpInst::getInversePredicate(EqP);
+      // Equivalent to "A InvEqP B".  This may be the same as the condition
+      // tested in the max/min; if so, we can just return that.
+      if (Value *V = ExtractEquivalentCondition(LHS, InvEqP, A, B))
+        return V;
+      if (Value *V = ExtractEquivalentCondition(RHS, InvEqP, A, B))
+        return V;
+      // Otherwise, see if "A InvEqP B" simplifies.
+      if (MaxRecurse)
+        if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, DT, MaxRecurse-1))
+          return V;
+      break;
+    }
+    case CmpInst::ICMP_UGE:
+      // Always true.
+      return Constant::getAllOnesValue(ITy);
+    case CmpInst::ICMP_ULT:
+      // Always false.
+      return Constant::getNullValue(ITy);
+    }
+  }
+
+  // Variants on "max(x,y) >= min(x,z)".
+  Value *C, *D;
+  if (match(LHS, m_SMax(m_Value(A), m_Value(B))) &&
+      match(RHS, m_SMin(m_Value(C), m_Value(D))) &&
+      (A == C || A == D || B == C || B == D)) {
+    // max(x, ?) pred min(x, ?).
+    if (Pred == CmpInst::ICMP_SGE)
+      // Always true.
+      return Constant::getAllOnesValue(ITy);
+    if (Pred == CmpInst::ICMP_SLT)
+      // Always false.
+      return Constant::getNullValue(ITy);
+  } else if (match(LHS, m_SMin(m_Value(A), m_Value(B))) &&
+             match(RHS, m_SMax(m_Value(C), m_Value(D))) &&
+             (A == C || A == D || B == C || B == D)) {
+    // min(x, ?) pred max(x, ?).
+    if (Pred == CmpInst::ICMP_SLE)
+      // Always true.
+      return Constant::getAllOnesValue(ITy);
+    if (Pred == CmpInst::ICMP_SGT)
+      // Always false.
+      return Constant::getNullValue(ITy);
+  } else if (match(LHS, m_UMax(m_Value(A), m_Value(B))) &&
+             match(RHS, m_UMin(m_Value(C), m_Value(D))) &&
+             (A == C || A == D || B == C || B == D)) {
+    // max(x, ?) pred min(x, ?).
+    if (Pred == CmpInst::ICMP_UGE)
+      // Always true.
+      return Constant::getAllOnesValue(ITy);
+    if (Pred == CmpInst::ICMP_ULT)
+      // Always false.
+      return Constant::getNullValue(ITy);
+  } else if (match(LHS, m_UMin(m_Value(A), m_Value(B))) &&
+             match(RHS, m_UMax(m_Value(C), m_Value(D))) &&
+             (A == C || A == D || B == C || B == D)) {
+    // min(x, ?) pred max(x, ?).
+    if (Pred == CmpInst::ICMP_ULE)
+      // Always true.
+      return Constant::getAllOnesValue(ITy);
+    if (Pred == CmpInst::ICMP_UGT)
+      // Always false.
+      return Constant::getNullValue(ITy);
+  }
+
   // If the comparison is with the result of a select instruction, check whether
   // comparing with either branch of the select always yields the same value.
   if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
@@ -1993,6 +2311,9 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
   case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, TD, DT, MaxRecurse);
   case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, TD, DT, MaxRecurse);
   case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, TD, DT, MaxRecurse);
+  case Instruction::SRem: return SimplifySRemInst(LHS, RHS, TD, DT, MaxRecurse);
+  case Instruction::URem: return SimplifyURemInst(LHS, RHS, TD, DT, MaxRecurse);
+  case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, TD, DT, MaxRecurse);
   case Instruction::Shl:
     return SimplifyShlInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
                            TD, DT, MaxRecurse);
@@ -2087,6 +2408,15 @@ Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD,
   case Instruction::FDiv:
     Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), TD, DT);
     break;
+  case Instruction::SRem:
+    Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
+  case Instruction::URem:
+    Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
+  case Instruction::FRem:
+    Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
   case Instruction::Shl:
     Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1),
                              cast<BinaryOperator>(I)->hasNoSignedWrap(),
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index 9e7da6c..6e27597 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -29,7 +29,6 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/STLExtras.h"
 #include <map>
-#include <set>
 #include <stack>
 using namespace llvm;
 
@@ -268,6 +267,8 @@ public:
 } // end anonymous namespace.
 
 namespace llvm {
+raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val)
+    LLVM_ATTRIBUTE_USED;
 raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) {
   if (Val.isUndefined())
     return OS << "undefined";
@@ -588,16 +589,18 @@ static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) {
   }
   if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
     if (MI->isVolatile()) return false;
-    if (MI->getAddressSpace() != 0) return false;
 
     // FIXME: check whether it has a valuerange that excludes zero?
     ConstantInt *Len = dyn_cast<ConstantInt>(MI->getLength());
     if (!Len || Len->isZero()) return false;
 
-    if (MI->getRawDest() == Ptr || MI->getDest() == Ptr)
-      return true;
+    if (MI->getDestAddressSpace() == 0)
+      if (MI->getRawDest() == Ptr || MI->getDest() == Ptr)
+        return true;
     if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI))
-      return MTI->getRawSource() == Ptr || MTI->getSource() == Ptr;
+      if (MTI->getSourceAddressSpace() == 0)
+        if (MTI->getRawSource() == Ptr || MTI->getSource() == Ptr)
+          return true;
   }
   return false;
 }
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index fc7edc0..f130f30 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -606,7 +606,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
                                     Type::getInt64Ty(V->getContext())))
         return findValueImpl(CE->getOperand(0), OffsetOk, Visited);
     } else if (CE->getOpcode() == Instruction::ExtractValue) {
-      const SmallVector<unsigned, 4> &Indices = CE->getIndices();
+      ArrayRef<unsigned> Indices = CE->getIndices();
       if (Value *W = FindInsertedValue(CE->getOperand(0),
                                        Indices.begin(),
                                        Indices.end()))
diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp
index 2ea27fb..c5c676b 100644
--- a/lib/Analysis/Loads.cpp
+++ b/lib/Analysis/Loads.cpp
@@ -17,6 +17,7 @@
 #include "llvm/GlobalAlias.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/Operator.h"
 using namespace llvm;
 
 /// AreEquivalentAddressValues - Test if A and B will obviously have the same
@@ -30,7 +31,7 @@ using namespace llvm;
 static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
   // Test if the values are trivially equivalent.
   if (A == B) return true;
-  
+
   // Test if the values come from identical arithmetic instructions.
   // Use isIdenticalToWhenDefined instead of isIdenticalTo because
   // this function is only used when one address use dominates the
@@ -41,7 +42,7 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
     if (const Instruction *BI = dyn_cast<Instruction>(B))
       if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
         return true;
-  
+
   // Otherwise they may not be equivalent.
   return false;
 }
diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp
index 64d215c..2283db0 100644
--- a/lib/Analysis/MemDepPrinter.cpp
+++ b/lib/Analysis/MemDepPrinter.cpp
@@ -79,8 +79,8 @@ bool MemDepPrinter::runOnFunction(Function &F) {
 
     MemDepResult Res = MDA.getDependency(Inst);
     if (!Res.isNonLocal()) {
-      assert(Res.isClobber() != Res.isDef() &&
-             "Local dep should be def or clobber!");
+      assert((Res.isUnknown() || Res.isClobber() || Res.isDef()) &&
+              "Local dep should be unknown, def or clobber!");
       Deps[Inst].insert(std::make_pair(InstAndClobberFlag(Res.getInst(),
                                                           Res.isClobber()),
                                        static_cast<BasicBlock *>(0)));
@@ -92,8 +92,9 @@ bool MemDepPrinter::runOnFunction(Function &F) {
       for (MemoryDependenceAnalysis::NonLocalDepInfo::const_iterator
            I = NLDI.begin(), E = NLDI.end(); I != E; ++I) {
         const MemDepResult &Res = I->getResult();
-        assert(Res.isClobber() != Res.isDef() &&
-               "Resolved non-local call dep should be def or clobber!");
+        assert((Res.isUnknown() || Res.isClobber() || Res.isDef()) &&
+                "Resolved non-local call dep should be unknown, def or "
+                "clobber!");
         InstDeps.insert(std::make_pair(InstAndClobberFlag(Res.getInst(),
                                                           Res.isClobber()),
                                        I->getBB()));
@@ -148,16 +149,24 @@ void MemDepPrinter::print(raw_ostream &OS, const Module *M) const {
       bool isClobber = I->first.getInt();
       const BasicBlock *DepBB = I->second;
 
-      OS << "    " << (isClobber ? "Clobber" : "    Def");
+      OS << "    ";
+      if (!DepInst)
+        OS << "Unknown";
+      else if (isClobber)
+        OS << "Clobber";
+      else
+        OS << "    Def";
       if (DepBB) {
         OS << " in block ";
         WriteAsOperand(OS, DepBB, /*PrintType=*/false, M);
       }
-      OS << " from: ";
-      if (DepInst == Inst)
-        OS << "<unspecified>";
-      else
-        DepInst->print(OS);
+      if (DepInst) {
+        OS << " from: ";
+        if (DepInst == Inst)
+          OS << "<unspecified>";
+        else
+          DepInst->print(OS);
+      }
       OS << "\n";
     }
 
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 35043bdd..bba4482 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -16,6 +16,7 @@
 
 #define DEBUG_TYPE "memdep"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Function.h"
@@ -46,6 +47,11 @@ STATISTIC(NumUncacheNonLocalPtr,
 STATISTIC(NumCacheCompleteNonLocalPtr,
           "Number of block queries that were completely cached");
 
+// Limit for the number of instructions to scan in a block.
+// FIXME: Figure out what a sane value is for this.
+//        (500 is relatively insane.)
+static const int BlockScanLimit = 500;
+
 char MemoryDependenceAnalysis::ID = 0;
   
 // Register this pass...
@@ -179,8 +185,16 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst,
 MemDepResult MemoryDependenceAnalysis::
 getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
                           BasicBlock::iterator ScanIt, BasicBlock *BB) {
+  unsigned Limit = BlockScanLimit;
+
   // Walk backwards through the block, looking for dependencies
   while (ScanIt != BB->begin()) {
+    // Limit the amount of scanning we do so we don't end up with quadratic
+    // running time on extreme testcases. 
+    --Limit;
+    if (!Limit)
+      return MemDepResult::getUnknown();
+
     Instruction *Inst = --ScanIt;
     
     // If this inst is a memory op, get the pointer it accessed
@@ -214,11 +228,101 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
     }
   }
   
-  // No dependence found.  If this is the entry block of the function, it is a
-  // clobber, otherwise it is non-local.
+  // No dependence found.  If this is the entry block of the function, it is
+  // unknown, otherwise it is non-local.
   if (BB != &BB->getParent()->getEntryBlock())
     return MemDepResult::getNonLocal();
-  return MemDepResult::getClobber(ScanIt);
+  return MemDepResult::getUnknown();
+}
+
+/// isLoadLoadClobberIfExtendedToFullWidth - Return true if LI is a load that
+/// would fully overlap MemLoc if done as a wider legal integer load.
+///
+/// MemLocBase, MemLocOffset are lazily computed here the first time the
+/// base/offs of memloc is needed.
+static bool 
+isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc,
+                                       const Value *&MemLocBase,
+                                       int64_t &MemLocOffs,
+                                       const LoadInst *LI,
+                                       const TargetData *TD) {
+  // If we have no target data, we can't do this.
+  if (TD == 0) return false;
+
+  // If we haven't already computed the base/offset of MemLoc, do so now.
+  if (MemLocBase == 0)
+    MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, *TD);
+
+  unsigned Size = MemoryDependenceAnalysis::
+    getLoadLoadClobberFullWidthSize(MemLocBase, MemLocOffs, MemLoc.Size,
+                                    LI, *TD);
+  return Size != 0;
+}
+
+/// getLoadLoadClobberFullWidthSize - This is a little bit of analysis that
+/// looks at a memory location for a load (specified by MemLocBase, Offs,
+/// and Size) and compares it against a load.  If the specified load could
+/// be safely widened to a larger integer load that is 1) still efficient,
+/// 2) safe for the target, and 3) would provide the specified memory
+/// location value, then this function returns the size in bytes of the
+/// load width to use.  If not, this returns zero.
+unsigned MemoryDependenceAnalysis::
+getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
+                                unsigned MemLocSize, const LoadInst *LI,
+                                const TargetData &TD) {
+  // We can only extend non-volatile integer loads.
+  if (!isa<IntegerType>(LI->getType()) || LI->isVolatile()) return 0;
+  
+  // Get the base of this load.
+  int64_t LIOffs = 0;
+  const Value *LIBase = 
+    GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, TD);
+  
+  // If the two pointers are not based on the same pointer, we can't tell that
+  // they are related.
+  if (LIBase != MemLocBase) return 0;
+  
+  // Okay, the two values are based on the same pointer, but returned as
+  // no-alias.  This happens when we have things like two byte loads at "P+1"
+  // and "P+3".  Check to see if increasing the size of the "LI" load up to its
+  // alignment (or the largest native integer type) will allow us to load all
+  // the bits required by MemLoc.
+  
+  // If MemLoc is before LI, then no widening of LI will help us out.
+  if (MemLocOffs < LIOffs) return 0;
+  
+  // Get the alignment of the load in bytes.  We assume that it is safe to load
+  // any legal integer up to this size without a problem.  For example, if we're
+  // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can
+  // widen it up to an i32 load.  If it is known 2-byte aligned, we can widen it
+  // to i16.
+  unsigned LoadAlign = LI->getAlignment();
+
+  int64_t MemLocEnd = MemLocOffs+MemLocSize;
+  
+  // If no amount of rounding up will let MemLoc fit into LI, then bail out.
+  if (LIOffs+LoadAlign < MemLocEnd) return 0;
+  
+  // This is the size of the load to try.  Start with the next larger power of
+  // two.
+  unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits()/8U;
+  NewLoadByteSize = NextPowerOf2(NewLoadByteSize);
+  
+  while (1) {
+    // If this load size is bigger than our known alignment or would not fit
+    // into a native integer register, then we fail.
+    if (NewLoadByteSize > LoadAlign ||
+        !TD.fitsInLegalInteger(NewLoadByteSize*8))
+      return 0;
+
+    // If a load of this width would include all of MemLoc, then we succeed.
+    if (LIOffs+NewLoadByteSize >= MemLocEnd)
+      return NewLoadByteSize;
+    
+    NewLoadByteSize <<= 1;
+  }
+  
+  return 0;
 }
 
 /// getPointerDependencyFrom - Return the instruction on which a memory
@@ -229,58 +333,39 @@ MemDepResult MemoryDependenceAnalysis::
 getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, 
                          BasicBlock::iterator ScanIt, BasicBlock *BB) {
 
-  Value *InvariantTag = 0;
+  const Value *MemLocBase = 0;
+  int64_t MemLocOffset = 0;
+
+  unsigned Limit = BlockScanLimit;
 
   // Walk backwards through the basic block, looking for dependencies.
   while (ScanIt != BB->begin()) {
+    // Limit the amount of scanning we do so we don't end up with quadratic
+    // running time on extreme testcases.
+    --Limit;
+    if (!Limit)
+      return MemDepResult::getUnknown();
+
     Instruction *Inst = --ScanIt;
 
-    // If we're in an invariant region, no dependencies can be found before
-    // we pass an invariant-begin marker.
-    if (InvariantTag == Inst) {
-      InvariantTag = 0;
-      continue;
-    }
-    
     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
       // Debug intrinsics don't (and can't) cause dependences.
       if (isa<DbgInfoIntrinsic>(II)) continue;
       
-      // If we pass an invariant-end marker, then we've just entered an
-      // invariant region and can start ignoring dependencies.
-      if (II->getIntrinsicID() == Intrinsic::invariant_end) {
-        // FIXME: This only considers queries directly on the invariant-tagged
-        // pointer, not on query pointers that are indexed off of them.  It'd
-        // be nice to handle that at some point.
-        AliasAnalysis::AliasResult R =
-          AA->alias(AliasAnalysis::Location(II->getArgOperand(2)), MemLoc);
-        if (R == AliasAnalysis::MustAlias)
-          InvariantTag = II->getArgOperand(0);
-
-        continue;
-      }
-
       // If we reach a lifetime begin or end marker, then the query ends here
       // because the value is undefined.
       if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
         // FIXME: This only considers queries directly on the invariant-tagged
         // pointer, not on query pointers that are indexed off of them.  It'd
-        // be nice to handle that at some point.
-        AliasAnalysis::AliasResult R =
-          AA->alias(AliasAnalysis::Location(II->getArgOperand(1)), MemLoc);
-        if (R == AliasAnalysis::MustAlias)
+        // be nice to handle that at some point (the right approach is to use
+        // GetPointerBaseWithConstantOffset).
+        if (AA->isMustAlias(AliasAnalysis::Location(II->getArgOperand(1)),
+                            MemLoc))
           return MemDepResult::getDef(II);
         continue;
       }
     }
 
-    // If we're querying on a load and we're in an invariant region, we're done
-    // at this point. Nothing a load depends on can live in an invariant region.
-    //
-    // FIXME: this will prevent us from returning load/load must-aliases, so GVN
-    // won't remove redundant loads.
-    if (isLoad && InvariantTag) continue;
-
     // Values depend on loads if the pointers are must aliased.  This means that
     // a load depends on another must aliased load from the same value.
     if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
@@ -288,27 +373,57 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
       
       // If we found a pointer, check if it could be the same as our pointer.
       AliasAnalysis::AliasResult R = AA->alias(LoadLoc, MemLoc);
-      if (R == AliasAnalysis::NoAlias)
-        continue;
       
-      // May-alias loads don't depend on each other without a dependence.
-      if (isLoad && R != AliasAnalysis::MustAlias)
+      if (isLoad) {
+        if (R == AliasAnalysis::NoAlias) {
+          // If this is an over-aligned integer load (for example,
+          // "load i8* %P, align 4") see if it would obviously overlap with the
+          // queried location if widened to a larger load (e.g. if the queried
+          // location is 1 byte at P+1).  If so, return it as a load/load
+          // clobber result, allowing the client to decide to widen the load if
+          // it wants to.
+          if (const IntegerType *ITy = dyn_cast<IntegerType>(LI->getType()))
+            if (LI->getAlignment()*8 > ITy->getPrimitiveSizeInBits() &&
+                isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase,
+                                                       MemLocOffset, LI, TD))
+              return MemDepResult::getClobber(Inst);
+          
+          continue;
+        }
+        
+        // Must aliased loads are defs of each other.
+        if (R == AliasAnalysis::MustAlias)
+          return MemDepResult::getDef(Inst);
+
+#if 0 // FIXME: Temporarily disabled. GVN is cleverly rewriting loads
+      // in terms of clobbering loads, but since it does this by looking
+      // at the clobbering load directly, it doesn't know about any
+      // phi translation that may have happened along the way.
+
+        // If we have a partial alias, then return this as a clobber for the
+        // client to handle.
+        if (R == AliasAnalysis::PartialAlias)
+          return MemDepResult::getClobber(Inst);
+#endif
+        
+        // Random may-alias loads don't depend on each other without a
+        // dependence.
+        continue;
+      }
+
+      // Stores don't depend on other no-aliased accesses.
+      if (R == AliasAnalysis::NoAlias)
         continue;
 
       // Stores don't alias loads from read-only memory.
-      if (!isLoad && AA->pointsToConstantMemory(LoadLoc))
+      if (AA->pointsToConstantMemory(LoadLoc))
         continue;
 
-      // Stores depend on may and must aliased loads, loads depend on must-alias
-      // loads.
+      // Stores depend on may/must aliased loads.
       return MemDepResult::getDef(Inst);
     }
     
     if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
-      // There can't be stores to the value we care about inside an 
-      // invariant region.
-      if (InvariantTag) continue;
-      
       // If alias analysis can tell that this store is guaranteed to not modify
       // the query pointer, ignore it.  Use getModRefInfo to handle cases where
       // the query pointer points to constant memory etc.
@@ -341,8 +456,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
         (isa<CallInst>(Inst) && extractMallocCall(Inst))) {
       const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, TD);
       
-      if (AccessPtr == Inst ||
-          AA->alias(Inst, 1, AccessPtr, 1) == AliasAnalysis::MustAlias)
+      if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr))
         return MemDepResult::getDef(Inst);
       continue;
     }
@@ -353,9 +467,6 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
       // If the call has no effect on the queried pointer, just ignore it.
       continue;
     case AliasAnalysis::Mod:
-      // If we're in an invariant region, we can ignore calls that ONLY
-      // modify the pointer.
-      if (InvariantTag) continue;
       return MemDepResult::getClobber(Inst);
     case AliasAnalysis::Ref:
       // If the call is known to never store to the pointer, and if this is a
@@ -368,11 +479,11 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
     }
   }
   
-  // No dependence found.  If this is the entry block of the function, it is a
-  // clobber, otherwise it is non-local.
+  // No dependence found.  If this is the entry block of the function, it is
+  // unknown, otherwise it is non-local.
   if (BB != &BB->getParent()->getEntryBlock())
     return MemDepResult::getNonLocal();
-  return MemDepResult::getClobber(ScanIt);
+  return MemDepResult::getUnknown();
 }
 
 /// getDependency - Return the instruction on which a memory operation
@@ -400,12 +511,12 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
   
   // Do the scan.
   if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
-    // No dependence found.  If this is the entry block of the function, it is a
-    // clobber, otherwise it is non-local.
+    // No dependence found.  If this is the entry block of the function, it is
+    // unknown, otherwise it is non-local.
     if (QueryParent != &QueryParent->getParent()->getEntryBlock())
       LocalCache = MemDepResult::getNonLocal();
     else
-      LocalCache = MemDepResult::getClobber(QueryInst);
+      LocalCache = MemDepResult::getUnknown();
   } else {
     AliasAnalysis::Location MemLoc;
     AliasAnalysis::ModRefResult MR = GetLocation(QueryInst, MemLoc, AA);
@@ -413,7 +524,7 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
       // If we can do a pointer scan, make it happen.
       bool isLoad = !(MR & AliasAnalysis::Mod);
       if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst))
-        isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_end;
+        isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
 
       LocalCache = getPointerDependencyFrom(MemLoc, isLoad, ScanPos,
                                             QueryParent);
@@ -424,7 +535,7 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
                                              QueryParent);
     } else
       // Non-memory instruction.
-      LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos));
+      LocalCache = MemDepResult::getUnknown();
   }
   
   // Remember the result!
@@ -558,10 +669,10 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
       Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB);
     } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
       // No dependence found.  If this is the entry block of the function, it is
-      // a clobber, otherwise it is non-local.
+      // a clobber, otherwise it is unknown.
       Dep = MemDepResult::getNonLocal();
     } else {
-      Dep = MemDepResult::getClobber(ScanPos);
+      Dep = MemDepResult::getUnknown();
     }
     
     // If we had a dirty entry for the block, update it.  Otherwise, just add
@@ -617,7 +728,7 @@ getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad,
     return;
   Result.clear();
   Result.push_back(NonLocalDepResult(FromBB,
-                                     MemDepResult::getClobber(FromBB->begin()),
+                                     MemDepResult::getUnknown(),
                                      const_cast<Value *>(Loc.Ptr)));
 }
 
@@ -679,7 +790,7 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
   // If the block has a dependency (i.e. it isn't completely transparent to
   // the value), remember the reverse association because we just added it
   // to Cache!
-  if (Dep.isNonLocal())
+  if (Dep.isNonLocal() || Dep.isUnknown())
     return Dep;
   
   // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
@@ -853,6 +964,9 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
   SmallVector<BasicBlock*, 32> Worklist;
   Worklist.push_back(StartBB);
   
+  // PredList used inside loop.
+  SmallVector<std::pair<BasicBlock*, PHITransAddr>, 16> PredList;
+
   // Keep track of the entries that we know are sorted.  Previously cached
   // entries will all be sorted.  The entries we add we only sort on demand (we
   // don't insert every element into its sorted position).  We know that we
@@ -889,22 +1003,29 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
     // the same Pointer.
     if (!Pointer.NeedsPHITranslationFromBlock(BB)) {
       SkipFirstBlock = false;
+      SmallVector<BasicBlock*, 16> NewBlocks;
       for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
         // Verify that we haven't looked at this block yet.
         std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
           InsertRes = Visited.insert(std::make_pair(*PI, Pointer.getAddr()));
         if (InsertRes.second) {
           // First time we've looked at *PI.
-          Worklist.push_back(*PI);
+          NewBlocks.push_back(*PI);
           continue;
         }
         
         // If we have seen this block before, but it was with a different
         // pointer then we have a phi translation failure and we have to treat
         // this as a clobber.
-        if (InsertRes.first->second != Pointer.getAddr())
+        if (InsertRes.first->second != Pointer.getAddr()) {
+          // Make sure to clean up the Visited map before continuing on to
+          // PredTranslationFailure.
+          for (unsigned i = 0; i < NewBlocks.size(); i++)
+            Visited.erase(NewBlocks[i]);
           goto PredTranslationFailure;
+        }
       }
+      Worklist.append(NewBlocks.begin(), NewBlocks.end());
       continue;
     }
     
@@ -923,13 +1044,15 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
       NumSortedEntries = Cache->size();
     }
     Cache = 0;
-    
+
+    PredList.clear();
     for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
       BasicBlock *Pred = *PI;
-      
+      PredList.push_back(std::make_pair(Pred, Pointer));
+
       // Get the PHI translated pointer in this predecessor.  This can fail if
       // not translatable, in which case the getAddr() returns null.
-      PHITransAddr PredPointer(Pointer);
+      PHITransAddr &PredPointer = PredList.back().second;
       PredPointer.PHITranslateValue(BB, Pred, 0);
 
       Value *PredPtrVal = PredPointer.getAddr();
@@ -943,6 +1066,9 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
         InsertRes = Visited.insert(std::make_pair(Pred, PredPtrVal));
 
       if (!InsertRes.second) {
+        // We found the pred; take it off the list of preds to visit.
+        PredList.pop_back();
+
         // If the predecessor was visited with PredPtr, then we already did
         // the analysis and can ignore it.
         if (InsertRes.first->second == PredPtrVal)
@@ -951,18 +1077,49 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
         // Otherwise, the block was previously analyzed with a different
         // pointer.  We can't represent the result of this case, so we just
         // treat this as a phi translation failure.
+
+        // Make sure to clean up the Visited map before continuing on to
+        // PredTranslationFailure.
+        for (unsigned i = 0; i < PredList.size(); i++)
+          Visited.erase(PredList[i].first);
+
         goto PredTranslationFailure;
       }
-      
+    }
+
+    // Actually process results here; this need to be a separate loop to avoid
+    // calling getNonLocalPointerDepFromBB for blocks we don't want to return
+    // any results for.  (getNonLocalPointerDepFromBB will modify our 
+    // datastructures in ways the code after the PredTranslationFailure label
+    // doesn't expect.)
+    for (unsigned i = 0; i < PredList.size(); i++) {
+      BasicBlock *Pred = PredList[i].first;
+      PHITransAddr &PredPointer = PredList[i].second;
+      Value *PredPtrVal = PredPointer.getAddr();
+
+      bool CanTranslate = true;
       // If PHI translation was unable to find an available pointer in this
       // predecessor, then we have to assume that the pointer is clobbered in
       // that predecessor.  We can still do PRE of the load, which would insert
       // a computation of the pointer in this predecessor.
-      if (PredPtrVal == 0) {
+      if (PredPtrVal == 0)
+        CanTranslate = false;
+
+      // FIXME: it is entirely possible that PHI translating will end up with
+      // the same value.  Consider PHI translating something like:
+      // X = phi [x, bb1], [y, bb2].  PHI translating for bb1 doesn't *need*
+      // to recurse here, pedantically speaking.
+
+      // If getNonLocalPointerDepFromBB fails here, that means the cached
+      // result conflicted with the Visited list; we have to conservatively
+      // assume it is unknown, but this also does not block PRE of the load.
+      if (!CanTranslate ||
+          getNonLocalPointerDepFromBB(PredPointer,
+                                      Loc.getWithNewPtr(PredPtrVal),
+                                      isLoad, Pred,
+                                      Result, Visited)) {
         // Add the entry to the Result list.
-        NonLocalDepResult Entry(Pred,
-                                MemDepResult::getClobber(Pred->getTerminator()),
-                                PredPtrVal);
+        NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
         Result.push_back(Entry);
 
         // Since we had a phi translation failure, the cache for CacheKey won't
@@ -974,19 +1131,6 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
         NLPI.Pair = BBSkipFirstBlockPair();
         continue;
       }
-
-      // FIXME: it is entirely possible that PHI translating will end up with
-      // the same value.  Consider PHI translating something like:
-      // X = phi [x, bb1], [y, bb2].  PHI translating for bb1 doesn't *need*
-      // to recurse here, pedantically speaking.
-      
-      // If we have a problem phi translating, fall through to the code below
-      // to handle the failure condition.
-      if (getNonLocalPointerDepFromBB(PredPointer,
-                                      Loc.getWithNewPtr(PredPointer.getAddr()),
-                                      isLoad, Pred,
-                                      Result, Visited))
-        goto PredTranslationFailure;
     }
     
     // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
@@ -1003,6 +1147,9 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
     continue;
 
   PredTranslationFailure:
+    // The following code is "failure"; we can't produce a sane translation
+    // for the given block.  It assumes that we haven't modified any of
+    // our datastructures while processing the current block.
     
     if (Cache == 0) {
       // Refresh the CacheInfo/Cache pointer if it got invalidated.
@@ -1017,8 +1164,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
     // results from the set".  Clear out the indicator for this.
     CacheInfo->Pair = BBSkipFirstBlockPair();
     
-    // If *nothing* works, mark the pointer as being clobbered by the first
-    // instruction in this block.
+    // If *nothing* works, mark the pointer as unknown.
     //
     // If this is the magic first block, return this as a clobber of the whole
     // incoming value.  Since we can't phi translate to one of the predecessors,
@@ -1033,8 +1179,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
       
       assert(I->getResult().isNonLocal() &&
              "Should only be here with transparent block");
-      I->setResult(MemDepResult::getClobber(BB->begin()));
-      ReverseNonLocalPtrDeps[BB->begin()].insert(CacheKey);
+      I->setResult(MemDepResult::getUnknown());
       Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(),
                                          Pointer.getAddr()));
       break;
diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp
index 93da5a4..70dcd0d 100644
--- a/lib/Analysis/PHITransAddr.cpp
+++ b/lib/Analysis/PHITransAddr.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Constants.h"
 #include "llvm/Instructions.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
diff --git a/lib/Analysis/PathNumbering.cpp b/lib/Analysis/PathNumbering.cpp
index 5d3f6bb..7c584da 100644
--- a/lib/Analysis/PathNumbering.cpp
+++ b/lib/Analysis/PathNumbering.cpp
@@ -38,13 +38,10 @@
 #include "llvm/Support/TypeBuilder.h"
 #include "llvm/Support/raw_ostream.h"
 
-#include <map>
 #include <queue>
-#include <set>
 #include <stack>
 #include <string>
 #include <utility>
-#include <vector>
 #include <sstream>
 
 using namespace llvm;
@@ -286,7 +283,7 @@ void BallLarusDag::calculatePathNumbers() {
       BallLarusEdge* exitEdge = addEdge(node, getExit(), 0);
       exitEdge->setType(BallLarusEdge::SPLITEDGE_PHONY);
 
-      // Counters to handle the possibilty of a multi-graph
+      // Counters to handle the possibility of a multi-graph
       BasicBlock* oldTarget = 0;
       unsigned duplicateNumber = 0;
 
diff --git a/lib/Analysis/PathProfileVerifier.cpp b/lib/Analysis/PathProfileVerifier.cpp
index c549773..0ae734e 100644
--- a/lib/Analysis/PathProfileVerifier.cpp
+++ b/lib/Analysis/PathProfileVerifier.cpp
@@ -124,7 +124,7 @@ bool PathProfileVerifier::runOnModule (Module &M) {
       ProfilePathEdgeVector* pev = currentPath->getPathEdges();
       DEBUG(dbgs () << "path #" << currentPath->getNumber() << ": "
             << currentPath->getCount() << "\n");
-      // setup the entry edge (normally path profiling doens't care about this)
+      // setup the entry edge (normally path profiling doesn't care about this)
       if (currentPath->getFirstBlockInPath() == &F->getEntryBlock())
         edgeArray[arrayMap[0][currentPath->getFirstBlockInPath()][0]]
           += currentPath->getCount();
diff --git a/lib/Analysis/ProfileEstimatorPass.cpp b/lib/Analysis/ProfileEstimatorPass.cpp
index 667ee1c..b594e2b 100644
--- a/lib/Analysis/ProfileEstimatorPass.cpp
+++ b/lib/Analysis/ProfileEstimatorPass.cpp
@@ -140,7 +140,7 @@ void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) {
     // loop, thus the edge is a backedge, continue and do not check if the
     // value is valid.
     if (BBisHeader && BBLoop->contains(*bbi)) {
-      printEdgeError(edge, "but is backedge, continueing");
+      printEdgeError(edge, "but is backedge, continuing");
       continue;
     }
     // If the edges value is missing (and this is no loop header, and this is
diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp
index 36f211e..173de2c 100644
--- a/lib/Analysis/ProfileInfo.cpp
+++ b/lib/Analysis/ProfileInfo.cpp
@@ -309,9 +309,9 @@ void ProfileInfoT<Function,BasicBlock>::
   removeEdge(oldedge);
 }
 
-/// Replaces all occurences of RmBB in the ProfilingInfo with DestBB.
+/// Replaces all occurrences of RmBB in the ProfilingInfo with DestBB.
 /// This checks all edges of the function the blocks reside in and replaces the
-/// occurences of RmBB with DestBB.
+/// occurrences of RmBB with DestBB.
 template<>
 void ProfileInfoT<Function,BasicBlock>::
         replaceAllUses(const BasicBlock *RmBB, const BasicBlock *DestBB) {
@@ -812,7 +812,7 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) {
       }
       if (iw < 0) continue;
 
-      // Check the recieving end of the path if it can handle the flow.
+      // Check the receiving end of the path if it can handle the flow.
       double ow = getExecutionCount(Dest);
       Processed.clear();
       for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
diff --git a/lib/Analysis/ProfileInfoLoader.cpp b/lib/Analysis/ProfileInfoLoader.cpp
index 25481b2..eaa38da 100644
--- a/lib/Analysis/ProfileInfoLoader.cpp
+++ b/lib/Analysis/ProfileInfoLoader.cpp
@@ -19,7 +19,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include <cstdio>
 #include <cstdlib>
-#include <map>
 using namespace llvm;
 
 // ByteSwap - Byteswap 'Var' if 'Really' is true.
diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp
index 3269dcc..80eda79 100644
--- a/lib/Analysis/RegionPass.cpp
+++ b/lib/Analysis/RegionPass.cpp
@@ -249,7 +249,7 @@ void RegionPass::assignPassManager(PMStack &PMS,
     assert (!PMS.empty() && "Unable to create Region Pass Manager");
     PMDataManager *PMD = PMS.top();
 
-    // [1] Create new Call Graph Pass Manager
+    // [1] Create new Region Pass Manager
     RGPM = new RGPassManager(PMD->getDepth() + 1);
     RGPM->populateInheritedAnalysis(PMS);
 
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 228974d..025718e 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -1035,6 +1035,93 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
   return S;
 }
 
+// Get the limit of a recurrence such that incrementing by Step cannot cause
+// signed overflow as long as the value of the recurrence within the loop does
+// not exceed this limit before incrementing.
+static const SCEV *getOverflowLimitForStep(const SCEV *Step,
+                                           ICmpInst::Predicate *Pred,
+                                           ScalarEvolution *SE) {
+  unsigned BitWidth = SE->getTypeSizeInBits(Step->getType());
+  if (SE->isKnownPositive(Step)) {
+    *Pred = ICmpInst::ICMP_SLT;
+    return SE->getConstant(APInt::getSignedMinValue(BitWidth) -
+                           SE->getSignedRange(Step).getSignedMax());
+  }
+  if (SE->isKnownNegative(Step)) {
+    *Pred = ICmpInst::ICMP_SGT;
+    return SE->getConstant(APInt::getSignedMaxValue(BitWidth) -
+                       SE->getSignedRange(Step).getSignedMin());
+  }
+  return 0;
+}
+
+// The recurrence AR has been shown to have no signed wrap. Typically, if we can
+// prove NSW for AR, then we can just as easily prove NSW for its preincrement
+// or postincrement sibling. This allows normalizing a sign extended AddRec as
+// such: {sext(Step + Start),+,Step} => {(Step + sext(Start),+,Step} As a
+// result, the expression "Step + sext(PreIncAR)" is congruent with
+// "sext(PostIncAR)"
+static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR,
+                                            const Type *Ty,
+                                            ScalarEvolution *SE) {
+  const Loop *L = AR->getLoop();
+  const SCEV *Start = AR->getStart();
+  const SCEV *Step = AR->getStepRecurrence(*SE);
+
+  // Check for a simple looking step prior to loop entry.
+  const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
+  if (!SA || SA->getNumOperands() != 2 || SA->getOperand(0) != Step)
+    return 0;
+
+  // This is a postinc AR. Check for overflow on the preinc recurrence using the
+  // same three conditions that getSignExtendedExpr checks.
+
+  // 1. NSW flags on the step increment.
+  const SCEV *PreStart = SA->getOperand(1);
+  const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
+    SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));
+
+  if (PreAR && PreAR->getNoWrapFlags(SCEV::FlagNSW))
+    return PreStart;
+
+  // 2. Direct overflow check on the step operation's expression.
+  unsigned BitWidth = SE->getTypeSizeInBits(AR->getType());
+  const Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2);
+  const SCEV *OperandExtendedStart =
+    SE->getAddExpr(SE->getSignExtendExpr(PreStart, WideTy),
+                   SE->getSignExtendExpr(Step, WideTy));
+  if (SE->getSignExtendExpr(Start, WideTy) == OperandExtendedStart) {
+    // Cache knowledge of PreAR NSW.
+    if (PreAR)
+      const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(SCEV::FlagNSW);
+    // FIXME: this optimization needs a unit test
+    DEBUG(dbgs() << "SCEV: untested prestart overflow check\n");
+    return PreStart;
+  }
+
+  // 3. Loop precondition.
+  ICmpInst::Predicate Pred;
+  const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, SE);
+
+  if (OverflowLimit &&
+      SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) {
+    return PreStart;
+  }
+  return 0;
+}
+
+// Get the normalized sign-extended expression for this AddRec's Start.
+static const SCEV *getSignExtendAddRecStart(const SCEVAddRecExpr *AR,
+                                            const Type *Ty,
+                                            ScalarEvolution *SE) {
+  const SCEV *PreStart = getPreStartForSignExtend(AR, Ty, SE);
+  if (!PreStart)
+    return SE->getSignExtendExpr(AR->getStart(), Ty);
+
+  return SE->getAddExpr(SE->getSignExtendExpr(AR->getStepRecurrence(*SE), Ty),
+                        SE->getSignExtendExpr(PreStart, Ty));
+}
+
 const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
                                                const Type *Ty) {
   assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
@@ -1097,7 +1184,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
       // If we have special knowledge that this addrec won't overflow,
       // we don't need to do any further analysis.
       if (AR->getNoWrapFlags(SCEV::FlagNSW))
-        return getAddRecExpr(getSignExtendExpr(Start, Ty),
+        return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
                              getSignExtendExpr(Step, Ty),
                              L, SCEV::FlagNSW);
 
@@ -1133,7 +1220,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
             // Cache knowledge of AR NSW, which is propagated to this AddRec.
             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
             // Return the expression with the addrec on the outside.
-            return getAddRecExpr(getSignExtendExpr(Start, Ty),
+            return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
                                  getSignExtendExpr(Step, Ty),
                                  L, AR->getNoWrapFlags());
           }
@@ -1149,7 +1236,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
             // Cache knowledge of AR NSW, which is propagated to this AddRec.
             const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
             // Return the expression with the addrec on the outside.
-            return getAddRecExpr(getSignExtendExpr(Start, Ty),
+            return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
                                  getZeroExtendExpr(Step, Ty),
                                  L, AR->getNoWrapFlags());
           }
@@ -1159,34 +1246,18 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
         // the addrec is safe. Also, if the entry is guarded by a comparison
         // with the start value and the backedge is guarded by a comparison
         // with the post-inc value, the addrec is safe.
-        if (isKnownPositive(Step)) {
-          const SCEV *N = getConstant(APInt::getSignedMinValue(BitWidth) -
-                                      getSignedRange(Step).getSignedMax());
-          if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT, AR, N) ||
-              (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, Start, N) &&
-               isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT,
-                                           AR->getPostIncExpr(*this), N))) {
-            // Cache knowledge of AR NSW, which is propagated to this AddRec.
-            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
-            // Return the expression with the addrec on the outside.
-            return getAddRecExpr(getSignExtendExpr(Start, Ty),
-                                 getSignExtendExpr(Step, Ty),
-                                 L, AR->getNoWrapFlags());
-          }
-        } else if (isKnownNegative(Step)) {
-          const SCEV *N = getConstant(APInt::getSignedMaxValue(BitWidth) -
-                                      getSignedRange(Step).getSignedMin());
-          if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT, AR, N) ||
-              (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGT, Start, N) &&
-               isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT,
-                                           AR->getPostIncExpr(*this), N))) {
-            // Cache knowledge of AR NSW, which is propagated to this AddRec.
-            const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
-            // Return the expression with the addrec on the outside.
-            return getAddRecExpr(getSignExtendExpr(Start, Ty),
-                                 getSignExtendExpr(Step, Ty),
-                                 L, AR->getNoWrapFlags());
-          }
+        ICmpInst::Predicate Pred;
+        const SCEV *OverflowLimit = getOverflowLimitForStep(Step, &Pred, this);
+        if (OverflowLimit &&
+            (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) ||
+             (isLoopEntryGuardedByCond(L, Pred, Start, OverflowLimit) &&
+              isLoopBackedgeGuardedByCond(L, Pred, AR->getPostIncExpr(*this),
+                                          OverflowLimit)))) {
+          // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec.
+          const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW);
+          return getAddRecExpr(getSignExtendAddRecStart(AR, Ty, this),
+                               getSignExtendExpr(Step, Ty),
+                               L, AR->getNoWrapFlags());
         }
       }
     }
@@ -1882,7 +1953,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
       // outer mul and the inner addrec are guaranteed to have no overflow.
       //
       // No self-wrap cannot be guaranteed after changing the step size, but
-      // will be infered if either NUW or NSW is true.
+      // will be inferred if either NUW or NSW is true.
       Flags = AddRec->getNoWrapFlags(clearFlags(Flags, SCEV::FlagNW));
       const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop, Flags);
 
@@ -2015,7 +2086,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
           }
       }
       // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
-      if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(LHS)) {
+      if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(LHS)) {
         SmallVector<const SCEV *, 4> Operands;
         for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i)
           Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy));
@@ -3783,24 +3854,25 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
   // update the value. The temporary CouldNotCompute value tells SCEV
   // code elsewhere that it shouldn't attempt to request a new
   // backedge-taken count, which could result in infinite recursion.
-  std::pair<std::map<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair =
+  std::pair<DenseMap<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair =
     BackedgeTakenCounts.insert(std::make_pair(L, getCouldNotCompute()));
   if (!Pair.second)
     return Pair.first->second;
 
-  BackedgeTakenInfo BECount = ComputeBackedgeTakenCount(L);
-  if (BECount.Exact != getCouldNotCompute()) {
-    assert(isLoopInvariant(BECount.Exact, L) &&
-           isLoopInvariant(BECount.Max, L) &&
+  BackedgeTakenInfo Result = getCouldNotCompute();
+  BackedgeTakenInfo Computed = ComputeBackedgeTakenCount(L);
+  if (Computed.Exact != getCouldNotCompute()) {
+    assert(isLoopInvariant(Computed.Exact, L) &&
+           isLoopInvariant(Computed.Max, L) &&
            "Computed backedge-taken count isn't loop invariant for loop!");
     ++NumTripCountsComputed;
 
     // Update the value in the map.
-    Pair.first->second = BECount;
+    Result = Computed;
   } else {
-    if (BECount.Max != getCouldNotCompute())
+    if (Computed.Max != getCouldNotCompute())
       // Update the value in the map.
-      Pair.first->second = BECount;
+      Result = Computed;
     if (isa<PHINode>(L->getHeader()->begin()))
       // Only count loops that have phi nodes as not being computable.
       ++NumTripCountsNotComputed;
@@ -3811,7 +3883,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
   // conservative estimates made without the benefit of trip count
   // information. This is similar to the code in forgetLoop, except that
   // it handles SCEVUnknown PHI nodes specially.
-  if (BECount.hasAnyInfo()) {
+  if (Computed.hasAnyInfo()) {
     SmallVector<Instruction *, 16> Worklist;
     PushLoopPHIs(L, Worklist);
 
@@ -3842,7 +3914,13 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
       PushDefUseChildren(I, Worklist);
     }
   }
-  return Pair.first->second;
+
+  // Re-lookup the insert position, since the call to
+  // ComputeBackedgeTakenCount above could result in a
+  // recusive call to getBackedgeTakenInfo (on a different
+  // loop), which would invalidate the iterator computed
+  // earlier.
+  return BackedgeTakenCounts.find(L)->second = Result;
 }
 
 /// forgetLoop - This method should be called by the client when it has
@@ -4426,7 +4504,7 @@ Constant *
 ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
                                                    const APInt &BEs,
                                                    const Loop *L) {
-  std::map<PHINode*, Constant*>::const_iterator I =
+  DenseMap<PHINode*, Constant*>::const_iterator I =
     ConstantEvolutionLoopExitValue.find(PN);
   if (I != ConstantEvolutionLoopExitValue.end())
     return I->second;
@@ -4694,9 +4772,15 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
       for (++i; i != e; ++i)
         NewOps.push_back(getSCEVAtScope(AddRec->getOperand(i), L));
 
-      AddRec = cast<SCEVAddRecExpr>(
+      const SCEV *FoldedRec =
         getAddRecExpr(NewOps, AddRec->getLoop(),
-                      AddRec->getNoWrapFlags(SCEV::FlagNW)));
+                      AddRec->getNoWrapFlags(SCEV::FlagNW));
+      AddRec = dyn_cast<SCEVAddRecExpr>(FoldedRec);
+      // The addrec may be folded to a nonrecurrence, for example, if the
+      // induction variable is multiplied by zero after constant folding. Go
+      // ahead and return the folded value.
+      if (!AddRec)
+        return FoldedRec;
       break;
     }
 
diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 40e18ab..0faf139 100644
--- a/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -31,7 +31,7 @@
 //
 // The second field identifies the type's parent node in the tree, or
 // is null or omitted for a root node. A type is considered to alias
-// all of its decendents and all of its ancestors in the tree. Also,
+// all of its descendants and all of its ancestors in the tree. Also,
 // a type is considered to alias all types in other trees, so that
 // bitcode produced from multiple front-ends is handled conservatively.
 //
@@ -59,6 +59,7 @@
 
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Passes.h"
+#include "llvm/Constants.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Metadata.h"
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index a8117e6..dab5aeb 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -131,8 +131,18 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
     }
     return;
   }
+  
+  if (Argument *A = dyn_cast<Argument>(V)) {
+    // Get alignment information off byval arguments if specified in the IR.
+    if (A->hasByValAttr())
+      if (unsigned Align = A->getParamAlignment())
+        KnownZero = Mask & APInt::getLowBitsSet(BitWidth,
+                                                CountTrailingZeros_32(Align));
+    return;
+  }
 
-  KnownZero.clearAllBits(); KnownOne.clearAllBits();   // Start out not knowing anything.
+  // Start out not knowing anything.
+  KnownZero.clearAllBits(); KnownOne.clearAllBits();
 
   if (Depth == MaxDepth || Mask == 0)
     return;  // Limit search depth.
@@ -670,6 +680,10 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
         KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
         break;
       }
+      case Intrinsic::x86_sse42_crc32_64_8:
+      case Intrinsic::x86_sse42_crc32_64_64:
+        KnownZero = APInt::getHighBitsSet(64, 32);
+        break;
       }
     }
     break;
@@ -1328,7 +1342,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType,
         break;
       }
     }
-    // If we succesfully found a value for each of our subaggregates 
+    // If we successfully found a value for each of our subaggregates
     if (To)
       return To;
   }
@@ -1757,7 +1771,7 @@ llvm::GetUnderlyingObject(Value *V, const TargetData *TD, unsigned MaxLookup) {
     } else {
       // See if InstructionSimplify knows any relevant tricks.
       if (Instruction *I = dyn_cast<Instruction>(V))
-        // TODO: Aquire a DominatorTree and use it.
+        // TODO: Acquire a DominatorTree and use it.
         if (Value *Simplified = SimplifyInstruction(I, TD, 0)) {
           V = Simplified;
           continue;
author	Nowar Gu <nowar100@gmail.com>	2011-06-17 14:29:24 +0800
committer	Nowar Gu <nowar100@gmail.com>	2011-06-20 15:49:07 +0800
commit	907af0f20f58f2ea26da7ea64e1f094cd6880db7 (patch)
tree	02007757de416c561df174d582205cebfa582801 /lib/Analysis
parent	1d4f9a57447faa0142a1d0301e5ce550cfe60c4f (diff)
parent	ec324e5ae44025c6bdb930b78198f30f807e355b (diff)
download	external_llvm-907af0f20f58f2ea26da7ea64e1f094cd6880db7.zip external_llvm-907af0f20f58f2ea26da7ea64e1f094cd6880db7.tar.gz external_llvm-907af0f20f58f2ea26da7ea64e1f094cd6880db7.tar.bz2