40 files changed, 1361 insertions, 1057 deletions
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 54a7f67..fa007cf 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -493,7 +493,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
   // Start by computing a new prototype for the function, which is the same as
   // the old function, but has modified arguments.
   const FunctionType *FTy = F->getFunctionType();
-  std::vector<const Type*> Params;
+  std::vector<Type*> Params;
 
   typedef std::set<IndicesVector> ScalarizeTable;
 
@@ -733,12 +733,12 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
     Instruction *New;
     if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
       New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
-                               Args.begin(), Args.end(), "", Call);
+                               Args, "", Call);
       cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
       cast<InvokeInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(),
                                                           AttributesVec.end()));
     } else {
-      New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call);
+      New = CallInst::Create(NF, Args, "", Call);
       cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
       cast<CallInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(),
                                                         AttributesVec.end()));
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt
index 179b150..3de7bfc 100644
--- a/lib/Transforms/IPO/CMakeLists.txt
+++ b/lib/Transforms/IPO/CMakeLists.txt
@@ -2,7 +2,6 @@ add_llvm_library(LLVMipo
   ArgumentPromotion.cpp
   ConstantMerge.cpp
   DeadArgumentElimination.cpp
-  DeadTypeElimination.cpp
   ExtractGV.cpp
   FunctionAttrs.cpp
   GlobalDCE.cpp
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index d4eaf0c..1517765 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -208,7 +208,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
   // the old function, but doesn't have isVarArg set.
   const FunctionType *FTy = Fn.getFunctionType();
 
-  std::vector<const Type*> Params(FTy->param_begin(), FTy->param_end());
+  std::vector<Type*> Params(FTy->param_begin(), FTy->param_end());
   FunctionType *NFTy = FunctionType::get(FTy->getReturnType(),
                                                 Params, false);
   unsigned NumArgs = Params.size();
@@ -244,11 +244,11 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
     Instruction *New;
     if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
       New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
-                               Args.begin(), Args.end(), "", Call);
+                               Args, "", Call);
       cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
       cast<InvokeInst>(New)->setAttributes(PAL);
     } else {
-      New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call);
+      New = CallInst::Create(NF, Args, "", Call);
       cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
       cast<CallInst>(New)->setAttributes(PAL);
       if (cast<CallInst>(Call)->isTailCall())
@@ -647,7 +647,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
   // Start by computing a new prototype for the function, which is the same as
   // the old function, but has fewer arguments and a different return type.
   const FunctionType *FTy = F->getFunctionType();
-  std::vector<const Type*> Params;
+  std::vector<Type*> Params;
 
   // Set up to build a new list of parameter attributes.
   SmallVector<AttributeWithIndex, 8> AttributesVec;
@@ -659,13 +659,13 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
 
   // Find out the new return value.
 
-  const Type *RetTy = FTy->getReturnType();
+  Type *RetTy = FTy->getReturnType();
   const Type *NRetTy = NULL;
   unsigned RetCount = NumRetVals(F);
 
   // -1 means unused, other numbers are the new index
   SmallVector<int, 5> NewRetIdxs(RetCount, -1);
-  std::vector<const Type*> RetTypes;
+  std::vector<Type*> RetTypes;
   if (RetTy->isVoidTy()) {
     NRetTy = RetTy;
   } else {
@@ -822,11 +822,11 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
     Instruction *New;
     if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
       New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
-                               Args.begin(), Args.end(), "", Call);
+                               Args, "", Call);
       cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
       cast<InvokeInst>(New)->setAttributes(NewCallPAL);
     } else {
-      New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call);
+      New = CallInst::Create(NF, Args, "", Call);
       cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
       cast<CallInst>(New)->setAttributes(NewCallPAL);
       if (cast<CallInst>(Call)->isTailCall())
diff --git a/lib/Transforms/IPO/DeadTypeElimination.cpp b/lib/Transforms/IPO/DeadTypeElimination.cpp
deleted file mode 100644
index d3d4963..0000000
--- a/lib/Transforms/IPO/DeadTypeElimination.cpp
+++ /dev/null
@@ -1,112 +0,0 @@
-//===- DeadTypeElimination.cpp - Eliminate unused types for symbol table --===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass is used to cleanup the output of GCC.  It eliminate names for types
-// that are unused in the entire translation unit, using the FindUsedTypes pass.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "deadtypeelim"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/Analysis/FindUsedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/TypeSymbolTable.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/ADT/Statistic.h"
-using namespace llvm;
-
-STATISTIC(NumKilled, "Number of unused typenames removed from symtab");
-
-namespace {
-  struct DTE : public ModulePass {
-    static char ID; // Pass identification, replacement for typeid
-    DTE() : ModulePass(ID) {
-      initializeDTEPass(*PassRegistry::getPassRegistry());
-    }
-
-    // doPassInitialization - For this pass, it removes global symbol table
-    // entries for primitive types.  These are never used for linking in GCC and
-    // they make the output uglier to look at, so we nuke them.
-    //
-    // Also, initialize instance variables.
-    //
-    bool runOnModule(Module &M);
-
-    // getAnalysisUsage - This function needs FindUsedTypes to do its job...
-    //
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequired<FindUsedTypes>();
-    }
-  };
-}
-
-char DTE::ID = 0;
-INITIALIZE_PASS_BEGIN(DTE, "deadtypeelim", "Dead Type Elimination",
-                      false, false)
-INITIALIZE_PASS_DEPENDENCY(FindUsedTypes)
-INITIALIZE_PASS_END(DTE, "deadtypeelim", "Dead Type Elimination", false, false)
-
-ModulePass *llvm::createDeadTypeEliminationPass() {
-  return new DTE();
-}
-
-
-// ShouldNukeSymtabEntry - Return true if this module level symbol table entry
-// should be eliminated.
-//
-static inline bool ShouldNukeSymtabEntry(const Type *Ty){
-  // Nuke all names for primitive types!
-  if (Ty->isPrimitiveType() || Ty->isIntegerTy()) 
-    return true;
-
-  // Nuke all pointers to primitive types as well...
-  if (const PointerType *PT = dyn_cast<PointerType>(Ty))
-    if (PT->getElementType()->isPrimitiveType() ||
-        PT->getElementType()->isIntegerTy()) 
-      return true;
-
-  return false;
-}
-
-// run - For this pass, it removes global symbol table entries for primitive
-// types.  These are never used for linking in GCC and they make the output
-// uglier to look at, so we nuke them.  Also eliminate types that are never used
-// in the entire program as indicated by FindUsedTypes.
-//
-bool DTE::runOnModule(Module &M) {
-  bool Changed = false;
-
-  TypeSymbolTable &ST = M.getTypeSymbolTable();
-  const SetVector<const Type*> &T = getAnalysis<FindUsedTypes>().getTypes();
-  std::set<const Type*> UsedTypes(T.begin(), T.end());
-
-  // Check the symbol table for superfluous type entries...
-  //
-  // Grab the 'type' plane of the module symbol...
-  TypeSymbolTable::iterator TI = ST.begin();
-  TypeSymbolTable::iterator TE = ST.end();
-  while ( TI != TE ) {
-    // If this entry should be unconditionally removed, or if we detect that
-    // the type is not used, remove it.
-    const Type *RHS = TI->second;
-    if (ShouldNukeSymtabEntry(RHS) || !UsedTypes.count(RHS)) {
-      ST.remove(TI++);
-      ++NumKilled;
-      Changed = true;
-    } else {
-      ++TI;
-      // We only need to leave one name for each type.
-      UsedTypes.erase(RHS);
-    }
-  }
-
-  return Changed;
-}
-
-// vim: sw=2
diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp
index 21dcb51..31ce95f 100644
--- a/lib/Transforms/IPO/IPO.cpp
+++ b/lib/Transforms/IPO/IPO.cpp
@@ -25,7 +25,6 @@ void llvm::initializeIPO(PassRegistry &Registry) {
   initializeConstantMergePass(Registry);
   initializeDAEPass(Registry);
   initializeDAHPass(Registry);
-  initializeDTEPass(Registry);
   initializeFunctionAttrsPass(Registry);
   initializeGlobalDCEPass(Registry);
   initializeGlobalOptPass(Registry);
@@ -63,10 +62,6 @@ void LLVMAddDeadArgEliminationPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createDeadArgEliminationPass());
 }
 
-void LLVMAddDeadTypeEliminationPass(LLVMPassManagerRef PM) {
-  unwrap(PM)->add(createDeadTypeEliminationPass());
-}
-
 void LLVMAddFunctionAttrsPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createFunctionAttrsPass());
 }
diff --git a/lib/Transforms/IPO/LowerSetJmp.cpp b/lib/Transforms/IPO/LowerSetJmp.cpp
index 52ecf17..659476b 100644
--- a/lib/Transforms/IPO/LowerSetJmp.cpp
+++ b/lib/Transforms/IPO/LowerSetJmp.cpp
@@ -267,7 +267,7 @@ void LowerSetJmp::TransformLongJmpCall(CallInst* Inst)
   CastInst* CI = 
     new BitCastInst(Inst->getArgOperand(0), SBPTy, "LJBuf", Inst);
   Value *Args[] = { CI, Inst->getArgOperand(1) };
-  CallInst::Create(ThrowLongJmp, Args, Args + 2, "", Inst);
+  CallInst::Create(ThrowLongJmp, Args, "", Inst);
 
   SwitchValuePair& SVP = SwitchValMap[Inst->getParent()->getParent()];
 
@@ -386,7 +386,7 @@ void LowerSetJmp::TransformSetJmpCall(CallInst* Inst)
     GetSetJmpMap(Func), BufPtr,
     ConstantInt::get(Type::getInt32Ty(Inst->getContext()), SetJmpIDMap[Func]++)
   };
-  CallInst::Create(AddSJToMap, Args, Args + 3, "", Inst);
+  CallInst::Create(AddSJToMap, Args, "", Inst);
 
   // We are guaranteed that there are no values live across basic blocks
   // (because we are "not in SSA form" yet), but there can still be values live
@@ -482,7 +482,7 @@ void LowerSetJmp::visitCallInst(CallInst& CI)
   std::vector<Value*> Params(CS.arg_begin(), CS.arg_end());
   InvokeInst* II =
     InvokeInst::Create(CI.getCalledValue(), NewBB, PrelimBBMap[Func],
-                       Params.begin(), Params.end(), CI.getName(), Term);
+                       Params, CI.getName(), Term);
   II->setCallingConv(CI.getCallingConv());
   II->setAttributes(CI.getAttributes());
 
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index f741443..7796d05 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -218,7 +218,6 @@ bool FunctionComparator::isEquivalentType(const Type *Ty1,
     llvm_unreachable("Unknown type!");
     // Fall through in Release mode.
   case Type::IntegerTyID:
-  case Type::OpaqueTyID:
   case Type::VectorTyID:
     // Ty1 == Ty2 would have returned true earlier.
     return false;
@@ -733,7 +732,7 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
     ++i;
   }
 
-  CallInst *CI = Builder.CreateCall(F, Args.begin(), Args.end());
+  CallInst *CI = Builder.CreateCall(F, Args);
   CI->setTailCall();
   CI->setCallingConv(F->getCallingConv());
   if (NewG->getReturnType()->isVoidTy()) {
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index 2f3baeb..b7e63dc 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -175,8 +175,7 @@ bool PruneEH::SimplifyFunction(Function *F) {
       if (II->doesNotThrow()) {
         SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
         // Insert a call instruction before the invoke.
-        CallInst *Call = CallInst::Create(II->getCalledValue(),
-                                          Args.begin(), Args.end(), "", II);
+        CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II);
         Call->takeName(II);
         Call->setCallingConv(II->getCallingConv());
         Call->setAttributes(II->getAttributes());
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index a690765..0fbaff1 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -28,8 +28,8 @@
 #include "llvm/Pass.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/ValueSymbolTable.h"
-#include "llvm/TypeSymbolTable.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 using namespace llvm;
 
@@ -143,8 +143,7 @@ static void RemoveDeadConstant(Constant *C) {
   assert(C->use_empty() && "Constant is not dead!");
   SmallPtrSet<Constant*, 4> Operands;
   for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
-    if (isa<DerivedType>(C->getOperand(i)->getType()) &&
-        OnlyUsedBy(C->getOperand(i), C)) 
+    if (OnlyUsedBy(C->getOperand(i), C)) 
       Operands.insert(cast<Constant>(C->getOperand(i)));
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
     if (!GV->hasLocalLinkage()) return;   // Don't delete non static globals.
@@ -174,13 +173,19 @@ static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo) {
   }
 }
 
-// Strip the symbol table of its names.
-static void StripTypeSymtab(TypeSymbolTable &ST, bool PreserveDbgInfo) {
-  for (TypeSymbolTable::iterator TI = ST.begin(), E = ST.end(); TI != E; ) {
-    if (PreserveDbgInfo && StringRef(TI->first).startswith("llvm.dbg"))
-      ++TI;
-    else
-      ST.remove(TI++);
+// Strip any named types of their names.
+static void StripTypeNames(Module &M, bool PreserveDbgInfo) {
+  std::vector<StructType*> StructTypes;
+  M.findUsedStructTypes(StructTypes);
+
+  for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
+    StructType *STy = StructTypes[i];
+    if (STy->isAnonymous() || STy->getName().empty()) continue;
+    
+    if (PreserveDbgInfo && STy->getName().startswith("llvm.dbg"))
+      continue;
+
+    STy->setName("");
   }
 }
 
@@ -221,7 +226,7 @@ static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
   }
   
   // Remove all names from types.
-  StripTypeSymtab(M.getTypeSymbolTable(), PreserveDbgInfo);
+  StripTypeNames(M, PreserveDbgInfo);
 
   return true;
 }
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index a08446e..64ea36f 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1400,7 +1400,7 @@ static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
 /// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom.
 /// If so, insert the new bswap intrinsic and return it.
 Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
-  const IntegerType *ITy = dyn_cast<IntegerType>(I.getType());
+  IntegerType *ITy = dyn_cast<IntegerType>(I.getType());
   if (!ITy || ITy->getBitWidth() % 16 || 
       // ByteMask only allows up to 32-byte values.
       ITy->getBitWidth() > 32*8) 
@@ -1424,9 +1424,8 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
   for (unsigned i = 1, e = ByteValues.size(); i != e; ++i)
     if (ByteValues[i] != V)
       return 0;
-  const Type *Tys[] = { ITy };
   Module *M = I.getParent()->getParent()->getParent();
-  Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
+  Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, ITy);
   return CallInst::Create(F, V);
 }
 
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 27e15c3..537f2b3 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -217,10 +217,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
         if (GVSrc->isConstant()) {
           Module *M = CI.getParent()->getParent()->getParent();
           Intrinsic::ID MemCpyID = Intrinsic::memcpy;
-          const Type *Tys[3] = { CI.getArgOperand(0)->getType(),
-                                 CI.getArgOperand(1)->getType(),
-                                 CI.getArgOperand(2)->getType() };
-          CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys, 3));
+          Type *Tys[3] = { CI.getArgOperand(0)->getType(),
+                           CI.getArgOperand(1)->getType(),
+                           CI.getArgOperand(2)->getType() };
+          CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys));
           Changed = true;
         }
     }
@@ -1118,13 +1118,13 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
   Instruction *NC;
   if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
     NC = Builder->CreateInvoke(Callee, II->getNormalDest(),
-                               II->getUnwindDest(), Args.begin(), Args.end());
+                               II->getUnwindDest(), Args);
     NC->takeName(II);
     cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
     cast<InvokeInst>(NC)->setAttributes(NewCallerPAL);
   } else {
     CallInst *CI = cast<CallInst>(Caller);
-    NC = Builder->CreateCall(Callee, Args.begin(), Args.end());
+    NC = Builder->CreateCall(Callee, Args);
     NC->takeName(CI);
     if (CI->isTailCall())
       cast<CallInst>(NC)->setTailCall();
@@ -1187,7 +1187,7 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
   const AttrListPtr &NestAttrs = NestF->getAttributes();
   if (!NestAttrs.isEmpty()) {
     unsigned NestIdx = 1;
-    const Type *NestTy = 0;
+    Type *NestTy = 0;
     Attributes NestAttr = Attribute::None;
 
     // Look for a parameter marked with the 'nest' attribute.
@@ -1249,7 +1249,7 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
       // Handle this by synthesizing a new function type, equal to FTy
       // with the chain parameter inserted.
 
-      std::vector<const Type*> NewTypes;
+      std::vector<Type*> NewTypes;
       NewTypes.reserve(FTy->getNumParams()+1);
 
       // Insert the chain's type into the list of parameter types, which may
@@ -1289,11 +1289,11 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
       if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
         NewCaller = InvokeInst::Create(NewCallee,
                                        II->getNormalDest(), II->getUnwindDest(),
-                                       NewArgs.begin(), NewArgs.end());
+                                       NewArgs);
         cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
         cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
       } else {
-        NewCaller = CallInst::Create(NewCallee, NewArgs.begin(), NewArgs.end());
+        NewCaller = CallInst::Create(NewCallee, NewArgs);
         if (cast<CallInst>(Caller)->isTailCall())
           cast<CallInst>(NewCaller)->setTailCall();
         cast<CallInst>(NewCaller)->
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 601d9b4..82c734e 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -30,6 +30,14 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
   }
   
   if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
+    // Cannot look past anything that might overflow.
+    OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val);
+    if (OBI && !OBI->hasNoUnsignedWrap()) {
+      Scale = 1;
+      Offset = 0;
+      return Val;
+    }
+
     if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
       if (I->getOpcode() == Instruction::Shl) {
         // This is a value scaled by '1 << the shift amt'.
@@ -71,11 +79,6 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
   // This requires TargetData to get the alloca alignment and size information.
   if (!TD) return 0;
 
-  // Insist that the amount-to-allocate not overflow.
-  OverflowingBinaryOperator *OBI =
-    dyn_cast<OverflowingBinaryOperator>(AI.getOperand(0));
-  if (OBI && !(OBI->hasNoSignedWrap() || OBI->hasNoUnsignedWrap())) return 0;
-
   const PointerType *PTy = cast<PointerType>(CI.getType());
   
   BuilderTy AllocaBuilder(*Builder);
@@ -1213,7 +1216,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
   CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0));
   if (Call && Call->getCalledFunction() &&
       Call->getCalledFunction()->getName() == "sqrt" &&
-      Call->getNumArgOperands() == 1) {
+      Call->getNumArgOperands() == 1 &&
+      Call->hasOneUse()) {
     CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0));
     if (Arg && Arg->getOpcode() == Instruction::FPExt &&
         CI.getType()->isFloatTy() &&
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 42db444..c78760b 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -42,13 +42,12 @@ static ConstantInt *ExtractElement(Constant *V, Constant *Idx) {
 static bool HasAddOverflow(ConstantInt *Result,
                            ConstantInt *In1, ConstantInt *In2,
                            bool IsSigned) {
-  if (IsSigned)
-    if (In2->getValue().isNegative())
-      return Result->getValue().sgt(In1->getValue());
-    else
-      return Result->getValue().slt(In1->getValue());
-  else
+  if (!IsSigned)
     return Result->getValue().ult(In1->getValue());
+
+  if (In2->isNegative())
+    return Result->getValue().sgt(In1->getValue());
+  return Result->getValue().slt(In1->getValue());
 }
 
 /// AddWithOverflow - Compute Result = In1+In2, returning true if the result
@@ -77,13 +76,13 @@ static bool AddWithOverflow(Constant *&Result, Constant *In1,
 static bool HasSubOverflow(ConstantInt *Result,
                            ConstantInt *In1, ConstantInt *In2,
                            bool IsSigned) {
-  if (IsSigned)
-    if (In2->getValue().isNegative())
-      return Result->getValue().slt(In1->getValue());
-    else
-      return Result->getValue().sgt(In1->getValue());
-  else
+  if (!IsSigned)
     return Result->getValue().ugt(In1->getValue());
+  
+  if (In2->isNegative())
+    return Result->getValue().slt(In1->getValue());
+
+  return Result->getValue().sgt(In1->getValue());
 }
 
 /// SubWithOverflow - Compute Result = In1-In2, returning true if the result
@@ -128,8 +127,7 @@ static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS,
   case ICmpInst::ICMP_UGT:
     // True if LHS u> RHS and RHS == high-bit-mask - 1
     TrueIfSigned = true;
-    return RHS->getValue() ==
-      APInt::getSignedMaxValue(RHS->getType()->getPrimitiveSizeInBits());
+    return RHS->isMaxValue(true);
   case ICmpInst::ICMP_UGE: 
     // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc)
     TrueIfSigned = true;
@@ -278,8 +276,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
     
     // If this is indexing an array of structures, get the structure element.
     if (!LaterIndices.empty())
-      Elt = ConstantExpr::getExtractValue(Elt, LaterIndices.data(),
-                                          LaterIndices.size());
+      Elt = ConstantExpr::getExtractValue(Elt, LaterIndices);
     
     // If the element is masked, handle it.
     if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst);
@@ -828,7 +825,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
         LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0;
       }
     }
-  } else if (DivRHS->getValue().isNegative()) { // Divisor is < 0.
+  } else if (DivRHS->isNegative()) { // Divisor is < 0.
     if (DivI->isExact())
       RangeSize = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
     if (CmpRHSV == 0) {       // (X / neg) op 0
@@ -1028,7 +1025,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         
         // If the sign bit of the XorCST is not set, there is no change to
         // the operation, just stop using the Xor.
-        if (!XorCST->getValue().isNegative()) {
+        if (!XorCST->isNegative()) {
           ICI.setOperand(0, CompareVal);
           Worklist.Add(LHSI);
           return &ICI;
@@ -1061,7 +1058,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         }
 
         // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A)
-        if (!ICI.isEquality() && XorCST->getValue().isMaxSignedValue()) {
+        if (!ICI.isEquality() && XorCST->isMaxValue(true)) {
           const APInt &NotSignBit = XorCST->getValue();
           ICmpInst::Predicate Pred = ICI.isSigned()
                                          ? ICI.getUnsignedPredicate()
@@ -1088,7 +1085,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         // Extending a relational comparison when we're checking the sign
         // bit would not work.
         if (ICI.isEquality() ||
-            (AndCST->getValue().isNonNegative() && RHSV.isNonNegative())) {
+            (!AndCST->isNegative() && RHSV.isNonNegative())) {
           Value *NewAnd =
             Builder->CreateAnd(Cast->getOperand(0),
                                ConstantExpr::getZExt(AndCST, Cast->getSrcTy()));
@@ -1454,7 +1451,11 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
             return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ :
                                 ICmpInst::ICMP_NE, LHSI,
                                 Constant::getNullValue(RHS->getType()));
-          
+
+          // Don't perform the following transforms if the AND has multiple uses
+          if (!BO->hasOneUse())
+            break;
+
           // Replace (and X, (1 << size(X)-1) != 0) with x s< 0
           if (BOC->getValue().isSignBit()) {
             Value *X = BO->getOperand(0);
@@ -1679,9 +1680,9 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
   // result and the overflow bit.
   Module *M = I.getParent()->getParent()->getParent();
   
-  const Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth);
+  Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth);
   Value *F = Intrinsic::getDeclaration(M, Intrinsic::sadd_with_overflow,
-                                       &NewType, 1);
+                                       NewType);
 
   InstCombiner::BuilderTy *Builder = IC.Builder;
   
@@ -1721,8 +1722,8 @@ static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV,
   Builder->SetInsertPoint(OrigAdd);
 
   Module *M = I.getParent()->getParent()->getParent();
-  const Type *Ty = LHS->getType();
-  Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, &Ty,1);
+  Type *Ty = LHS->getType();
+  Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
   CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd");
   Value *Add = Builder->CreateExtractValue(Call, 0);
 
@@ -2384,7 +2385,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
                                 BO1->getOperand(0));
           }
           
-          if (CI->getValue().isMaxSignedValue()) {
+          if (CI->isMaxValue(true)) {
             ICmpInst::Predicate Pred = I.isSigned()
                                            ? I.getUnsignedPredicate()
                                            : I.getSignedPredicate();
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 2d29403..630a6fe 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -691,14 +691,14 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
     bool hasNegative = false;
     for (unsigned i = 0; !hasNegative && i != VWidth; ++i)
       if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i)))
-        if (RHS->getValue().isNegative())
+        if (RHS->isNegative())
           hasNegative = true;
 
     if (hasNegative) {
       std::vector<Constant *> Elts(VWidth);
       for (unsigned i = 0; i != VWidth; ++i) {
         if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) {
-          if (RHS->getValue().isNegative())
+          if (RHS->isNegative())
             Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS));
           else
             Elts[i] = RHS;
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 92c10f5..ab98ef9 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -785,6 +785,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   // getelementptr instructions into a single instruction.
   //
   if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) {
+
+    // If this GEP has only 0 indices, it is the same pointer as
+    // Src. If Src is not a trivial GEP too, don't combine
+    // the indices.
+    if (GEP.hasAllZeroIndices() && !Src->hasAllZeroIndices() &&
+        !Src->hasOneUse())
+      return 0;
+
     // Note that if our source is a gep chain itself that we wait for that
     // chain to be resolved before we perform this transformation.  This
     // avoids us creating a TON of code in some cases.
@@ -1191,7 +1199,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
       if (EV.getNumIndices() > 1)
         // Extract the remaining indices out of the constant indexed by the
         // first index
-        return ExtractValueInst::Create(V, EV.idx_begin() + 1, EV.idx_end());
+        return ExtractValueInst::Create(V, EV.getIndices().slice(1));
       else
         return ReplaceInstUsesWith(EV, V);
     }
@@ -1214,7 +1222,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
         // with
         // %E = extractvalue { i32, { i32 } } %A, 0
         return ExtractValueInst::Create(IV->getAggregateOperand(),
-                                        EV.idx_begin(), EV.idx_end());
+                                        EV.getIndices());
     }
     if (exti == exte && insi == inse)
       // Both iterators are at the end: Index lists are identical. Replace
@@ -1232,9 +1240,9 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
       // by switching the order of the insert and extract (though the
       // insertvalue should be left in, since it may have other uses).
       Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(),
-                                                 EV.idx_begin(), EV.idx_end());
+                                                 EV.getIndices());
       return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
-                                     insi, inse);
+                                     ArrayRef<unsigned>(insi, inse));
     }
     if (insi == inse)
       // The insert list is a prefix of the extract list
@@ -1246,7 +1254,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
       // with
       // %E extractvalue { i32 } { i32 42 }, 0
       return ExtractValueInst::Create(IV->getInsertedValueOperand(), 
-                                      exti, exte);
+                                      ArrayRef<unsigned>(exti, exte));
   }
   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) {
     // We're extracting from an intrinsic, see if we're the only user, which
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 07d69e8..3f2c412 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -572,14 +572,13 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable(
 }
 
 Constant *GCOVProfiler::getStartFileFunc() {
-  const Type *Args[] = { Type::getInt8PtrTy(*Ctx) };
   const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
-                                              Args, false);
+                                              Type::getInt8PtrTy(*Ctx), false);
   return M->getOrInsertFunction("llvm_gcda_start_file", FTy);
 }
 
 Constant *GCOVProfiler::getIncrementIndirectCounterFunc() {
-  const Type *Args[] = {
+  Type *Args[] = {
     Type::getInt32PtrTy(*Ctx),                  // uint32_t *predecessor
     Type::getInt64PtrTy(*Ctx)->getPointerTo(),  // uint64_t **state_table_row
   };
@@ -589,7 +588,7 @@ Constant *GCOVProfiler::getIncrementIndirectCounterFunc() {
 }
 
 Constant *GCOVProfiler::getEmitFunctionFunc() {
-  const Type *Args[2] = {
+  Type *Args[2] = {
     Type::getInt32Ty(*Ctx),    // uint32_t ident
     Type::getInt8PtrTy(*Ctx),  // const char *function_name
   };
@@ -599,7 +598,7 @@ Constant *GCOVProfiler::getEmitFunctionFunc() {
 }
 
 Constant *GCOVProfiler::getEmitArcsFunc() {
-  const Type *Args[] = {
+  Type *Args[] = {
     Type::getInt32Ty(*Ctx),     // uint32_t num_counters
     Type::getInt64PtrTy(*Ctx),  // uint64_t *counters
   };
diff --git a/lib/Transforms/Instrumentation/PathProfiling.cpp b/lib/Transforms/Instrumentation/PathProfiling.cpp
index 1e5e3f6..7541663 100644
--- a/lib/Transforms/Instrumentation/PathProfiling.cpp
+++ b/lib/Transforms/Instrumentation/PathProfiling.cpp
@@ -1062,7 +1062,7 @@ void PathProfiler::insertCounterIncrement(Value* incValue,
 
     CallInst::Create(
       increment ? llvmIncrementHashFunction : llvmDecrementHashFunction,
-      args.begin(), args.end(), "", insertPoint);
+      args, "", insertPoint);
   }
 }
 
diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
index 4224ee3..445a5b6 100644
--- a/lib/Transforms/Instrumentation/ProfilingUtils.cpp
+++ b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
@@ -62,8 +62,7 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
   }
   Args[3] = ConstantInt::get(Type::getInt32Ty(Context), NumElements);
 
-  CallInst *InitCall = CallInst::Create(InitFn, Args.begin(), Args.end(),
-                                        "newargc", InsertPos);
+  CallInst *InitCall = CallInst::Create(InitFn, Args, "newargc", InsertPos);
 
   // If argc or argv are not available in main, just pass null values in.
   Function::arg_iterator AI;
@@ -134,7 +133,7 @@ void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
 void llvm::InsertProfilingShutdownCall(Function *Callee, Module *Mod) {
   // llvm.global_dtors is an array of type { i32, void ()* }. Prepare those
   // types.
-  const Type *GlobalDtorElems[2] = {
+  Type *GlobalDtorElems[2] = {
     Type::getInt32Ty(Mod->getContext()),
     FunctionType::get(Type::getVoidTy(Mod->getContext()), false)->getPointerTo()
   };
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 759e681..87b7317 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -91,6 +91,7 @@ namespace {
     uint32_t nextValueNumber;
 
     Expression create_expression(Instruction* I);
+    Expression create_extractvalue_expression(ExtractValueInst* EI);
     uint32_t lookup_or_add_call(CallInst* C);
   public:
     ValueTable() : nextValueNumber(1) { }
@@ -141,7 +142,6 @@ template <> struct DenseMapInfo<Expression> {
 //                     ValueTable Internal Functions
 //===----------------------------------------------------------------------===//
 
-
 Expression ValueTable::create_expression(Instruction *I) {
   Expression e;
   e.type = I->getType();
@@ -150,12 +150,8 @@ Expression ValueTable::create_expression(Instruction *I) {
        OI != OE; ++OI)
     e.varargs.push_back(lookup_or_add(*OI));
   
-  if (CmpInst *C = dyn_cast<CmpInst>(I))
+  if (CmpInst *C = dyn_cast<CmpInst>(I)) {
     e.opcode = (C->getOpcode() << 8) | C->getPredicate();
-  else if (ExtractValueInst *E = dyn_cast<ExtractValueInst>(I)) {
-    for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
-         II != IE; ++II)
-      e.varargs.push_back(*II);
   } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) {
     for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
          II != IE; ++II)
@@ -165,6 +161,58 @@ Expression ValueTable::create_expression(Instruction *I) {
   return e;
 }
 
+Expression ValueTable::create_extractvalue_expression(ExtractValueInst *EI) {
+  assert(EI != 0 && "Not an ExtractValueInst?");
+  Expression e;
+  e.type = EI->getType();
+  e.opcode = 0;
+
+  IntrinsicInst *I = dyn_cast<IntrinsicInst>(EI->getAggregateOperand());
+  if (I != 0 && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) {
+    // EI might be an extract from one of our recognised intrinsics. If it
+    // is we'll synthesize a semantically equivalent expression instead on
+    // an extract value expression.
+    switch (I->getIntrinsicID()) {
+      case Intrinsic::sadd_with_overflow:
+      case Intrinsic::uadd_with_overflow:
+        e.opcode = Instruction::Add;
+        break;
+      case Intrinsic::ssub_with_overflow:
+      case Intrinsic::usub_with_overflow:
+        e.opcode = Instruction::Sub;
+        break;
+      case Intrinsic::smul_with_overflow:
+      case Intrinsic::umul_with_overflow:
+        e.opcode = Instruction::Mul;
+        break;
+      default:
+        break;
+    }
+
+    if (e.opcode != 0) {
+      // Intrinsic recognized. Grab its args to finish building the expression.
+      assert(I->getNumArgOperands() == 2 &&
+             "Expect two args for recognised intrinsics.");
+      e.varargs.push_back(lookup_or_add(I->getArgOperand(0)));
+      e.varargs.push_back(lookup_or_add(I->getArgOperand(1)));
+      return e;
+    }
+  }
+
+  // Not a recognised intrinsic. Fall back to producing an extract value
+  // expression.
+  e.opcode = EI->getOpcode();
+  for (Instruction::op_iterator OI = EI->op_begin(), OE = EI->op_end();
+       OI != OE; ++OI)
+    e.varargs.push_back(lookup_or_add(*OI));
+
+  for (ExtractValueInst::idx_iterator II = EI->idx_begin(), IE = EI->idx_end();
+         II != IE; ++II)
+    e.varargs.push_back(*II);
+
+  return e;
+}
+
 //===----------------------------------------------------------------------===//
 //                     ValueTable External Functions
 //===----------------------------------------------------------------------===//
@@ -336,11 +384,13 @@ uint32_t ValueTable::lookup_or_add(Value *V) {
     case Instruction::ExtractElement:
     case Instruction::InsertElement:
     case Instruction::ShuffleVector:
-    case Instruction::ExtractValue:
     case Instruction::InsertValue:
     case Instruction::GetElementPtr:
       exp = create_expression(I);
       break;
+    case Instruction::ExtractValue:
+      exp = create_extractvalue_expression(cast<ExtractValueInst>(I));
+      break;
     default:
       valueNumbering[V] = nextValueNumber;
       return nextValueNumber++;
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index b7c97e5..dee3d38 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -58,6 +58,7 @@
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
@@ -72,6 +73,7 @@ STATISTIC(NumElimIdentity, "Number of IV identities eliminated");
 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
 STATISTIC(NumElimRem     , "Number of IV remainder operations eliminated");
 STATISTIC(NumElimCmp     , "Number of IV comparisons eliminated");
+STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
 
 static cl::opt<bool> DisableIVRewrite(
   "disable-iv-rewrite", cl::Hidden,
@@ -114,8 +116,17 @@ namespace {
     }
 
   private:
+    virtual void releaseMemory() {
+      DeadInsts.clear();
+    }
+
     bool isValidRewrite(Value *FromVal, Value *ToVal);
 
+    void HandleFloatingPointIV(Loop *L, PHINode *PH);
+    void RewriteNonIntegerIVs(Loop *L);
+
+    void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
+
     void SimplifyIVUsers(SCEVExpander &Rewriter);
     void SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter);
 
@@ -124,20 +135,16 @@ namespace {
     void EliminateIVRemainder(BinaryOperator *Rem,
                               Value *IVOperand,
                               bool IsSigned);
-    bool isSimpleIVUser(Instruction *I, const Loop *L);
-    void RewriteNonIntegerIVs(Loop *L);
+
+    void SimplifyCongruentIVs(Loop *L);
+
+    void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter);
 
     ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
                                         PHINode *IndVar,
                                         SCEVExpander &Rewriter);
 
-    void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
-
-    void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter);
-
     void SinkUnusedInvariants(Loop *L);
-
-    void HandleFloatingPointIV(Loop *L, PHINode *PH);
   };
 }
 
@@ -204,156 +211,262 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
   return true;
 }
 
-/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
-/// count expression can be safely and cheaply expanded into an instruction
-/// sequence that can be used by LinearFunctionTestReplace.
-static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
-  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
-  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
-      BackedgeTakenCount->isZero())
-    return false;
+//===----------------------------------------------------------------------===//
+// RewriteNonIntegerIVs and helpers. Prefer integer IVs.
+//===----------------------------------------------------------------------===//
 
-  if (!L->getExitingBlock())
+/// ConvertToSInt - Convert APF to an integer, if possible.
+static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
+  bool isExact = false;
+  if (&APF.getSemantics() == &APFloat::PPCDoubleDouble)
     return false;
-
-  // Can't rewrite non-branch yet.
-  BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
-  if (!BI)
+  // See if we can convert this to an int64_t
+  uint64_t UIntVal;
+  if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero,
+                           &isExact) != APFloat::opOK || !isExact)
     return false;
-
-  // Special case: If the backedge-taken count is a UDiv, it's very likely a
-  // UDiv that ScalarEvolution produced in order to compute a precise
-  // expression, rather than a UDiv from the user's code. If we can't find a
-  // UDiv in the code with some simple searching, assume the former and forego
-  // rewriting the loop.
-  if (isa<SCEVUDivExpr>(BackedgeTakenCount)) {
-    ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition());
-    if (!OrigCond) return false;
-    const SCEV *R = SE->getSCEV(OrigCond->getOperand(1));
-    R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1));
-    if (R != BackedgeTakenCount) {
-      const SCEV *L = SE->getSCEV(OrigCond->getOperand(0));
-      L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1));
-      if (L != BackedgeTakenCount)
-        return false;
-    }
-  }
+  IntVal = UIntVal;
   return true;
 }
 
-/// getBackedgeIVType - Get the widest type used by the loop test after peeking
-/// through Truncs.
+/// HandleFloatingPointIV - If the loop has floating induction variable
+/// then insert corresponding integer induction variable if possible.
+/// For example,
+/// for(double i = 0; i < 10000; ++i)
+///   bar(i)
+/// is converted into
+/// for(int i = 0; i < 10000; ++i)
+///   bar((double)i);
 ///
-/// TODO: Unnecessary once LinearFunctionTestReplace is removed.
-static const Type *getBackedgeIVType(Loop *L) {
-  if (!L->getExitingBlock())
-    return 0;
+void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
+  unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
+  unsigned BackEdge     = IncomingEdge^1;
 
-  // Can't rewrite non-branch yet.
-  BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
-  if (!BI)
-    return 0;
+  // Check incoming value.
+  ConstantFP *InitValueVal =
+    dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
 
-  ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
-  if (!Cond)
-    return 0;
+  int64_t InitValue;
+  if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
+    return;
 
-  const Type *Ty = 0;
-  for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end();
-      OI != OE; ++OI) {
-    assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types");
-    TruncInst *Trunc = dyn_cast<TruncInst>(*OI);
-    if (!Trunc)
-      continue;
+  // Check IV increment. Reject this PN if increment operation is not
+  // an add or increment value can not be represented by an integer.
+  BinaryOperator *Incr =
+    dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
+  if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return;
 
-    return Trunc->getSrcTy();
+  // If this is not an add of the PHI with a constantfp, or if the constant fp
+  // is not an integer, bail out.
+  ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
+  int64_t IncValue;
+  if (IncValueVal == 0 || Incr->getOperand(0) != PN ||
+      !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
+    return;
+
+  // Check Incr uses. One user is PN and the other user is an exit condition
+  // used by the conditional terminator.
+  Value::use_iterator IncrUse = Incr->use_begin();
+  Instruction *U1 = cast<Instruction>(*IncrUse++);
+  if (IncrUse == Incr->use_end()) return;
+  Instruction *U2 = cast<Instruction>(*IncrUse++);
+  if (IncrUse != Incr->use_end()) return;
+
+  // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
+  // only used by a branch, we can't transform it.
+  FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
+  if (!Compare)
+    Compare = dyn_cast<FCmpInst>(U2);
+  if (Compare == 0 || !Compare->hasOneUse() ||
+      !isa<BranchInst>(Compare->use_back()))
+    return;
+
+  BranchInst *TheBr = cast<BranchInst>(Compare->use_back());
+
+  // We need to verify that the branch actually controls the iteration count
+  // of the loop.  If not, the new IV can overflow and no one will notice.
+  // The branch block must be in the loop and one of the successors must be out
+  // of the loop.
+  assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
+  if (!L->contains(TheBr->getParent()) ||
+      (L->contains(TheBr->getSuccessor(0)) &&
+       L->contains(TheBr->getSuccessor(1))))
+    return;
+
+
+  // If it isn't a comparison with an integer-as-fp (the exit value), we can't
+  // transform it.
+  ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
+  int64_t ExitValue;
+  if (ExitValueVal == 0 ||
+      !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
+    return;
+
+  // Find new predicate for integer comparison.
+  CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
+  switch (Compare->getPredicate()) {
+  default: return;  // Unknown comparison.
+  case CmpInst::FCMP_OEQ:
+  case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
+  case CmpInst::FCMP_ONE:
+  case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
+  case CmpInst::FCMP_OGT:
+  case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
+  case CmpInst::FCMP_OGE:
+  case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
+  case CmpInst::FCMP_OLT:
+  case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
+  case CmpInst::FCMP_OLE:
+  case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
   }
-  return Ty;
-}
 
-/// LinearFunctionTestReplace - This method rewrites the exit condition of the
-/// loop to be a canonical != comparison against the incremented loop induction
-/// variable.  This pass is able to rewrite the exit tests of any loop where the
-/// SCEV analysis can determine a loop-invariant trip count of the loop, which
-/// is actually a much broader range than just linear tests.
-ICmpInst *IndVarSimplify::
-LinearFunctionTestReplace(Loop *L,
-                          const SCEV *BackedgeTakenCount,
-                          PHINode *IndVar,
-                          SCEVExpander &Rewriter) {
-  assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
-  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
+  // We convert the floating point induction variable to a signed i32 value if
+  // we can.  This is only safe if the comparison will not overflow in a way
+  // that won't be trapped by the integer equivalent operations.  Check for this
+  // now.
+  // TODO: We could use i64 if it is native and the range requires it.
 
-  // If the exiting block is not the same as the backedge block, we must compare
-  // against the preincremented value, otherwise we prefer to compare against
-  // the post-incremented value.
-  Value *CmpIndVar;
-  const SCEV *RHS = BackedgeTakenCount;
-  if (L->getExitingBlock() == L->getLoopLatch()) {
-    // Add one to the "backedge-taken" count to get the trip count.
-    // If this addition may overflow, we have to be more pessimistic and
-    // cast the induction variable before doing the add.
-    const SCEV *Zero = SE->getConstant(BackedgeTakenCount->getType(), 0);
-    const SCEV *N =
-      SE->getAddExpr(BackedgeTakenCount,
-                     SE->getConstant(BackedgeTakenCount->getType(), 1));
-    if ((isa<SCEVConstant>(N) && !N->isZero()) ||
-        SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
-      // No overflow. Cast the sum.
-      RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType());
-    } else {
-      // Potential overflow. Cast before doing the add.
-      RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
-                                        IndVar->getType());
-      RHS = SE->getAddExpr(RHS,
-                           SE->getConstant(IndVar->getType(), 1));
+  // The start/stride/exit values must all fit in signed i32.
+  if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
+    return;
+
+  // If not actually striding (add x, 0.0), avoid touching the code.
+  if (IncValue == 0)
+    return;
+
+  // Positive and negative strides have different safety conditions.
+  if (IncValue > 0) {
+    // If we have a positive stride, we require the init to be less than the
+    // exit value and an equality or less than comparison.
+    if (InitValue >= ExitValue ||
+        NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE)
+      return;
+
+    uint32_t Range = uint32_t(ExitValue-InitValue);
+    if (NewPred == CmpInst::ICMP_SLE) {
+      // Normalize SLE -> SLT, check for infinite loop.
+      if (++Range == 0) return;  // Range overflows.
     }
 
-    // The BackedgeTaken expression contains the number of times that the
-    // backedge branches to the loop header.  This is one less than the
-    // number of times the loop executes, so use the incremented indvar.
-    CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
+    unsigned Leftover = Range % uint32_t(IncValue);
+
+    // If this is an equality comparison, we require that the strided value
+    // exactly land on the exit value, otherwise the IV condition will wrap
+    // around and do things the fp IV wouldn't.
+    if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
+        Leftover != 0)
+      return;
+
+    // If the stride would wrap around the i32 before exiting, we can't
+    // transform the IV.
+    if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
+      return;
+
   } else {
-    // We have to use the preincremented value...
-    RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
-                                      IndVar->getType());
-    CmpIndVar = IndVar;
+    // If we have a negative stride, we require the init to be greater than the
+    // exit value and an equality or greater than comparison.
+    if (InitValue >= ExitValue ||
+        NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE)
+      return;
+
+    uint32_t Range = uint32_t(InitValue-ExitValue);
+    if (NewPred == CmpInst::ICMP_SGE) {
+      // Normalize SGE -> SGT, check for infinite loop.
+      if (++Range == 0) return;  // Range overflows.
+    }
+
+    unsigned Leftover = Range % uint32_t(-IncValue);
+
+    // If this is an equality comparison, we require that the strided value
+    // exactly land on the exit value, otherwise the IV condition will wrap
+    // around and do things the fp IV wouldn't.
+    if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
+        Leftover != 0)
+      return;
+
+    // If the stride would wrap around the i32 before exiting, we can't
+    // transform the IV.
+    if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
+      return;
   }
 
-  // Expand the code for the iteration count.
-  assert(SE->isLoopInvariant(RHS, L) &&
-         "Computed iteration count is not loop invariant!");
-  Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI);
+  const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
 
-  // Insert a new icmp_ne or icmp_eq instruction before the branch.
-  ICmpInst::Predicate Opcode;
-  if (L->contains(BI->getSuccessor(0)))
-    Opcode = ICmpInst::ICMP_NE;
-  else
-    Opcode = ICmpInst::ICMP_EQ;
+  // Insert new integer induction variable.
+  PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
+  NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
+                      PN->getIncomingBlock(IncomingEdge));
 
-  DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
-               << "      LHS:" << *CmpIndVar << '\n'
-               << "       op:\t"
-               << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
-               << "      RHS:\t" << *RHS << "\n");
+  Value *NewAdd =
+    BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
+                              Incr->getName()+".int", Incr);
+  NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
 
-  ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond");
+  ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
+                                      ConstantInt::get(Int32Ty, ExitValue),
+                                      Compare->getName());
 
-  Value *OrigCond = BI->getCondition();
-  // It's tempting to use replaceAllUsesWith here to fully replace the old
-  // comparison, but that's not immediately safe, since users of the old
-  // comparison may not be dominated by the new comparison. Instead, just
-  // update the branch to use the new comparison; in the common case this
-  // will make old comparison dead.
-  BI->setCondition(Cond);
-  DeadInsts.push_back(OrigCond);
+  // In the following deletions, PN may become dead and may be deleted.
+  // Use a WeakVH to observe whether this happens.
+  WeakVH WeakPH = PN;
 
-  ++NumLFTR;
-  Changed = true;
-  return Cond;
+  // Delete the old floating point exit comparison.  The branch starts using the
+  // new comparison.
+  NewCompare->takeName(Compare);
+  Compare->replaceAllUsesWith(NewCompare);
+  RecursivelyDeleteTriviallyDeadInstructions(Compare);
+
+  // Delete the old floating point increment.
+  Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
+  RecursivelyDeleteTriviallyDeadInstructions(Incr);
+
+  // If the FP induction variable still has uses, this is because something else
+  // in the loop uses its value.  In order to canonicalize the induction
+  // variable, we chose to eliminate the IV and rewrite it in terms of an
+  // int->fp cast.
+  //
+  // We give preference to sitofp over uitofp because it is faster on most
+  // platforms.
+  if (WeakPH) {
+    Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
+                                 PN->getParent()->getFirstNonPHI());
+    PN->replaceAllUsesWith(Conv);
+    RecursivelyDeleteTriviallyDeadInstructions(PN);
+  }
+
+  // Add a new IVUsers entry for the newly-created integer PHI.
+  if (IU)
+    IU->AddUsersIfInteresting(NewPHI);
 }
 
+void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
+  // First step.  Check to see if there are any floating-point recurrences.
+  // If there are, change them into integer recurrences, permitting analysis by
+  // the SCEV routines.
+  //
+  BasicBlock *Header = L->getHeader();
+
+  SmallVector<WeakVH, 8> PHIs;
+  for (BasicBlock::iterator I = Header->begin();
+       PHINode *PN = dyn_cast<PHINode>(I); ++I)
+    PHIs.push_back(PN);
+
+  for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
+    if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
+      HandleFloatingPointIV(L, PN);
+
+  // If the loop previously had floating-point IV, ScalarEvolution
+  // may not have been able to compute a trip count. Now that we've done some
+  // re-writing, the trip count may be computable.
+  if (Changed)
+    SE->forgetLoop(L);
+}
+
+//===----------------------------------------------------------------------===//
+// RewriteLoopExitValues - Optimize IV users outside the loop.
+// As a side effect, reduces the amount of IV processing within the loop.
+//===----------------------------------------------------------------------===//
+
 /// RewriteLoopExitValues - Check to see if this loop has a computable
 /// loop-invariant execution count.  If so, this means that we can compute the
 /// final value of any expressions that are recurrent in the loop, and
@@ -467,28 +580,10 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
   Rewriter.clearInsertPoint();
 }
 
-void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
-  // First step.  Check to see if there are any floating-point recurrences.
-  // If there are, change them into integer recurrences, permitting analysis by
-  // the SCEV routines.
-  //
-  BasicBlock *Header = L->getHeader();
-
-  SmallVector<WeakVH, 8> PHIs;
-  for (BasicBlock::iterator I = Header->begin();
-       PHINode *PN = dyn_cast<PHINode>(I); ++I)
-    PHIs.push_back(PN);
-
-  for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
-    if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
-      HandleFloatingPointIV(L, PN);
-
-  // If the loop previously had floating-point IV, ScalarEvolution
-  // may not have been able to compute a trip count. Now that we've done some
-  // re-writing, the trip count may be computable.
-  if (Changed)
-    SE->forgetLoop(L);
-}
+//===----------------------------------------------------------------------===//
+//  Rewrite IV users based on a canonical IV.
+//  To be replaced by -disable-iv-rewrite.
+//===----------------------------------------------------------------------===//
 
 /// SimplifyIVUsers - Iteratively perform simplification on IVUsers within this
 /// loop. IVUsers is treated as a worklist. Each successive simplification may
@@ -520,6 +615,133 @@ void IndVarSimplify::SimplifyIVUsers(SCEVExpander &Rewriter) {
   }
 }
 
+// FIXME: It is an extremely bad idea to indvar substitute anything more
+// complex than affine induction variables.  Doing so will put expensive
+// polynomial evaluations inside of the loop, and the str reduction pass
+// currently can only reduce affine polynomials.  For now just disable
+// indvar subst on anything more complex than an affine addrec, unless
+// it can be expanded to a trivial value.
+static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
+  // Loop-invariant values are safe.
+  if (SE->isLoopInvariant(S, L)) return true;
+
+  // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how
+  // to transform them into efficient code.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+    return AR->isAffine();
+
+  // An add is safe it all its operands are safe.
+  if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) {
+    for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(),
+         E = Commutative->op_end(); I != E; ++I)
+      if (!isSafe(*I, L, SE)) return false;
+    return true;
+  }
+
+  // A cast is safe if its operand is.
+  if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
+    return isSafe(C->getOperand(), L, SE);
+
+  // A udiv is safe if its operands are.
+  if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S))
+    return isSafe(UD->getLHS(), L, SE) &&
+           isSafe(UD->getRHS(), L, SE);
+
+  // SCEVUnknown is always safe.
+  if (isa<SCEVUnknown>(S))
+    return true;
+
+  // Nothing else is safe.
+  return false;
+}
+
+void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
+  // Rewrite all induction variable expressions in terms of the canonical
+  // induction variable.
+  //
+  // If there were induction variables of other sizes or offsets, manually
+  // add the offsets to the primary induction variable and cast, avoiding
+  // the need for the code evaluation methods to insert induction variables
+  // of different sizes.
+  for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) {
+    Value *Op = UI->getOperandValToReplace();
+    const Type *UseTy = Op->getType();
+    Instruction *User = UI->getUser();
+
+    // Compute the final addrec to expand into code.
+    const SCEV *AR = IU->getReplacementExpr(*UI);
+
+    // Evaluate the expression out of the loop, if possible.
+    if (!L->contains(UI->getUser())) {
+      const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop());
+      if (SE->isLoopInvariant(ExitVal, L))
+        AR = ExitVal;
+    }
+
+    // FIXME: It is an extremely bad idea to indvar substitute anything more
+    // complex than affine induction variables.  Doing so will put expensive
+    // polynomial evaluations inside of the loop, and the str reduction pass
+    // currently can only reduce affine polynomials.  For now just disable
+    // indvar subst on anything more complex than an affine addrec, unless
+    // it can be expanded to a trivial value.
+    if (!isSafe(AR, L, SE))
+      continue;
+
+    // Determine the insertion point for this user. By default, insert
+    // immediately before the user. The SCEVExpander class will automatically
+    // hoist loop invariants out of the loop. For PHI nodes, there may be
+    // multiple uses, so compute the nearest common dominator for the
+    // incoming blocks.
+    Instruction *InsertPt = User;
+    if (PHINode *PHI = dyn_cast<PHINode>(InsertPt))
+      for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
+        if (PHI->getIncomingValue(i) == Op) {
+          if (InsertPt == User)
+            InsertPt = PHI->getIncomingBlock(i)->getTerminator();
+          else
+            InsertPt =
+              DT->findNearestCommonDominator(InsertPt->getParent(),
+                                             PHI->getIncomingBlock(i))
+                    ->getTerminator();
+        }
+
+    // Now expand it into actual Instructions and patch it into place.
+    Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
+
+    DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
+                 << "   into = " << *NewVal << "\n");
+
+    if (!isValidRewrite(Op, NewVal)) {
+      DeadInsts.push_back(NewVal);
+      continue;
+    }
+    // Inform ScalarEvolution that this value is changing. The change doesn't
+    // affect its value, but it does potentially affect which use lists the
+    // value will be on after the replacement, which affects ScalarEvolution's
+    // ability to walk use lists and drop dangling pointers when a value is
+    // deleted.
+    SE->forgetValue(User);
+
+    // Patch the new value into place.
+    if (Op->hasName())
+      NewVal->takeName(Op);
+    if (Instruction *NewValI = dyn_cast<Instruction>(NewVal))
+      NewValI->setDebugLoc(User->getDebugLoc());
+    User->replaceUsesOfWith(Op, NewVal);
+    UI->setOperandValToReplace(NewVal);
+
+    ++NumRemoved;
+    Changed = true;
+
+    // The old value may be dead now.
+    DeadInsts.push_back(Op);
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//  IV Widening - Extend the width of an IV to cover its widest uses.
+//===----------------------------------------------------------------------===//
+
 namespace {
   // Collect information about induction variables that are used by sign/zero
   // extend operations. This information is recorded by CollectExtend and
@@ -608,6 +830,8 @@ protected:
                            Instruction *NarrowDef,
                            Instruction *WideDef);
 
+  const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse);
+
   Instruction *WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef,
                           Instruction *WideDef);
 
@@ -704,6 +928,33 @@ static bool HoistStep(Instruction *IncV, Instruction *InsertPos,
   return true;
 }
 
+// GetWideRecurrence - Is this instruction potentially interesting from IVUsers'
+// perspective after widening it's type? In other words, can the extend be
+// safely hoisted out of the loop with SCEV reducing the value to a recurrence
+// on the same loop. If so, return the sign or zero extended
+// recurrence. Otherwise return NULL.
+const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
+  if (!SE->isSCEVable(NarrowUse->getType()))
+    return 0;
+
+  const SCEV *NarrowExpr = SE->getSCEV(NarrowUse);
+  if (SE->getTypeSizeInBits(NarrowExpr->getType())
+      >= SE->getTypeSizeInBits(WideType)) {
+    // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
+    // index. So don't follow this use.
+    return 0;
+  }
+
+  const SCEV *WideExpr = IsSigned ?
+    SE->getSignExtendExpr(NarrowExpr, WideType) :
+    SE->getZeroExtendExpr(NarrowExpr, WideType);
+  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
+  if (!AddRec || AddRec->getLoop() != L)
+    return 0;
+
+  return AddRec;
+}
+
 /// WidenIVUse - Determine whether an individual user of the narrow IV can be
 /// widened. If so, return the wide clone of the user.
 Instruction *WidenIV::WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef,
@@ -753,24 +1004,7 @@ Instruction *WidenIV::WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef,
   }
 
   // Does this user itself evaluate to a recurrence after widening?
-  const SCEVAddRecExpr *WideAddRec = 0;
-  if (SE->isSCEVable(NarrowUse->getType())) {
-    const SCEV *NarrowExpr = SE->getSCEV(NarrowUse);
-    if (SE->getTypeSizeInBits(NarrowExpr->getType())
-        >= SE->getTypeSizeInBits(WideType)) {
-      // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
-      // index. We have already extended the operand, so we're done.
-      return 0;
-    }
-    const SCEV *WideExpr = IsSigned ?
-      SE->getSignExtendExpr(NarrowExpr, WideType) :
-      SE->getZeroExtendExpr(NarrowExpr, WideType);
-
-    // Only widen past values that evaluate to a recurrence in the same loop.
-    const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
-    if (AddRec && AddRec->getLoop() == L)
-      WideAddRec = AddRec;
-  }
+  const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(NarrowUse);
   if (!WideAddRec) {
     // This user does not evaluate to a recurence after widening, so don't
     // follow it. Instead insert a Trunc to kill off the original use,
@@ -911,6 +1145,10 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
   return WidePhi;
 }
 
+//===----------------------------------------------------------------------===//
+//  Simplification of IV users based on SCEV evaluation.
+//===----------------------------------------------------------------------===//
+
 void IndVarSimplify::EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
   unsigned IVOperIdx = 0;
   ICmpInst::Predicate Pred = ICmp->getPredicate();
@@ -1057,7 +1295,7 @@ static void pushIVUsers(
 /// This is similar to IVUsers' isInsteresting() but processes each instruction
 /// non-recursively when the operand is already known to be a simpleIVUser.
 ///
-bool IndVarSimplify::isSimpleIVUser(Instruction *I, const Loop *L) {
+static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) {
   if (!SE->isSCEVable(I->getType()))
     return false;
 
@@ -1117,7 +1355,7 @@ void IndVarSimplify::SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter) {
       // Instructions processed by SimplifyIVUsers for CurrIV.
       SmallPtrSet<Instruction*,16> Simplified;
 
-      // Use-def pairs if IVUsers waiting to be processed for CurrIV.
+      // Use-def pairs if IV users waiting to be processed for CurrIV.
       SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers;
 
       // Push users of the current LoopPhi. In rare cases, pushIVUsers may be
@@ -1142,7 +1380,7 @@ void IndVarSimplify::SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter) {
           }
           continue;
         }
-        if (isSimpleIVUser(UseInst, L)) {
+        if (isSimpleIVUser(UseInst, L, SE)) {
           pushIVUsers(UseInst, Simplified, SimpleIVUsers);
         }
       }
@@ -1163,6 +1401,292 @@ void IndVarSimplify::SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter) {
   }
 }
 
+/// SimplifyCongruentIVs - Check for congruent phis in this loop header and
+/// populate ExprToIVMap for use later.
+///
+void IndVarSimplify::SimplifyCongruentIVs(Loop *L) {
+  DenseMap<const SCEV *, PHINode *> ExprToIVMap;
+  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+    PHINode *Phi = cast<PHINode>(I);
+    if (!SE->isSCEVable(Phi->getType()))
+      continue;
+
+    const SCEV *S = SE->getSCEV(Phi);
+    DenseMap<const SCEV *, PHINode *>::const_iterator Pos;
+    bool Inserted;
+    tie(Pos, Inserted) = ExprToIVMap.insert(std::make_pair(S, Phi));
+    if (Inserted)
+      continue;
+    PHINode *OrigPhi = Pos->second;
+    // Replacing the congruent phi is sufficient because acyclic redundancy
+    // elimination, CSE/GVN, should handle the rest. However, once SCEV proves
+    // that a phi is congruent, it's almost certain to be the head of an IV
+    // user cycle that is isomorphic with the original phi. So it's worth
+    // eagerly cleaning up the common case of a single IV increment.
+    if (BasicBlock *LatchBlock = L->getLoopLatch()) {
+      Instruction *OrigInc =
+        cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
+      Instruction *IsomorphicInc =
+        cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock));
+      if (OrigInc != IsomorphicInc &&
+          SE->getSCEV(OrigInc) == SE->getSCEV(IsomorphicInc) &&
+          HoistStep(OrigInc, IsomorphicInc, DT)) {
+        DEBUG(dbgs() << "INDVARS: Eliminated congruent iv.inc: "
+              << *IsomorphicInc << '\n');
+        IsomorphicInc->replaceAllUsesWith(OrigInc);
+        DeadInsts.push_back(IsomorphicInc);
+      }
+    }
+    DEBUG(dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi << '\n');
+    ++NumElimIV;
+    Phi->replaceAllUsesWith(OrigPhi);
+    DeadInsts.push_back(Phi);
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//  LinearFunctionTestReplace and its kin. Rewrite the loop exit condition.
+//===----------------------------------------------------------------------===//
+
+/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
+/// count expression can be safely and cheaply expanded into an instruction
+/// sequence that can be used by LinearFunctionTestReplace.
+static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
+  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
+      BackedgeTakenCount->isZero())
+    return false;
+
+  if (!L->getExitingBlock())
+    return false;
+
+  // Can't rewrite non-branch yet.
+  BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
+  if (!BI)
+    return false;
+
+  // Special case: If the backedge-taken count is a UDiv, it's very likely a
+  // UDiv that ScalarEvolution produced in order to compute a precise
+  // expression, rather than a UDiv from the user's code. If we can't find a
+  // UDiv in the code with some simple searching, assume the former and forego
+  // rewriting the loop.
+  if (isa<SCEVUDivExpr>(BackedgeTakenCount)) {
+    ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition());
+    if (!OrigCond) return false;
+    const SCEV *R = SE->getSCEV(OrigCond->getOperand(1));
+    R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1));
+    if (R != BackedgeTakenCount) {
+      const SCEV *L = SE->getSCEV(OrigCond->getOperand(0));
+      L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1));
+      if (L != BackedgeTakenCount)
+        return false;
+    }
+  }
+  return true;
+}
+
+/// getBackedgeIVType - Get the widest type used by the loop test after peeking
+/// through Truncs.
+///
+/// TODO: Unnecessary if LFTR does not force a canonical IV.
+static const Type *getBackedgeIVType(Loop *L) {
+  if (!L->getExitingBlock())
+    return 0;
+
+  // Can't rewrite non-branch yet.
+  BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
+  if (!BI)
+    return 0;
+
+  ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
+  if (!Cond)
+    return 0;
+
+  const Type *Ty = 0;
+  for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end();
+      OI != OE; ++OI) {
+    assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types");
+    TruncInst *Trunc = dyn_cast<TruncInst>(*OI);
+    if (!Trunc)
+      continue;
+
+    return Trunc->getSrcTy();
+  }
+  return Ty;
+}
+
+/// LinearFunctionTestReplace - This method rewrites the exit condition of the
+/// loop to be a canonical != comparison against the incremented loop induction
+/// variable.  This pass is able to rewrite the exit tests of any loop where the
+/// SCEV analysis can determine a loop-invariant trip count of the loop, which
+/// is actually a much broader range than just linear tests.
+ICmpInst *IndVarSimplify::
+LinearFunctionTestReplace(Loop *L,
+                          const SCEV *BackedgeTakenCount,
+                          PHINode *IndVar,
+                          SCEVExpander &Rewriter) {
+  assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
+  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
+
+  // If the exiting block is not the same as the backedge block, we must compare
+  // against the preincremented value, otherwise we prefer to compare against
+  // the post-incremented value.
+  Value *CmpIndVar;
+  const SCEV *RHS = BackedgeTakenCount;
+  if (L->getExitingBlock() == L->getLoopLatch()) {
+    // Add one to the "backedge-taken" count to get the trip count.
+    // If this addition may overflow, we have to be more pessimistic and
+    // cast the induction variable before doing the add.
+    const SCEV *Zero = SE->getConstant(BackedgeTakenCount->getType(), 0);
+    const SCEV *N =
+      SE->getAddExpr(BackedgeTakenCount,
+                     SE->getConstant(BackedgeTakenCount->getType(), 1));
+    if ((isa<SCEVConstant>(N) && !N->isZero()) ||
+        SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
+      // No overflow. Cast the sum.
+      RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType());
+    } else {
+      // Potential overflow. Cast before doing the add.
+      RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
+                                        IndVar->getType());
+      RHS = SE->getAddExpr(RHS,
+                           SE->getConstant(IndVar->getType(), 1));
+    }
+
+    // The BackedgeTaken expression contains the number of times that the
+    // backedge branches to the loop header.  This is one less than the
+    // number of times the loop executes, so use the incremented indvar.
+    CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
+  } else {
+    // We have to use the preincremented value...
+    RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
+                                      IndVar->getType());
+    CmpIndVar = IndVar;
+  }
+
+  // Expand the code for the iteration count.
+  assert(SE->isLoopInvariant(RHS, L) &&
+         "Computed iteration count is not loop invariant!");
+  Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI);
+
+  // Insert a new icmp_ne or icmp_eq instruction before the branch.
+  ICmpInst::Predicate Opcode;
+  if (L->contains(BI->getSuccessor(0)))
+    Opcode = ICmpInst::ICMP_NE;
+  else
+    Opcode = ICmpInst::ICMP_EQ;
+
+  DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
+               << "      LHS:" << *CmpIndVar << '\n'
+               << "       op:\t"
+               << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
+               << "      RHS:\t" << *RHS << "\n");
+
+  ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond");
+  Cond->setDebugLoc(BI->getDebugLoc());
+  Value *OrigCond = BI->getCondition();
+  // It's tempting to use replaceAllUsesWith here to fully replace the old
+  // comparison, but that's not immediately safe, since users of the old
+  // comparison may not be dominated by the new comparison. Instead, just
+  // update the branch to use the new comparison; in the common case this
+  // will make old comparison dead.
+  BI->setCondition(Cond);
+  DeadInsts.push_back(OrigCond);
+
+  ++NumLFTR;
+  Changed = true;
+  return Cond;
+}
+
+//===----------------------------------------------------------------------===//
+//  SinkUnusedInvariants. A late subpass to cleanup loop preheaders.
+//===----------------------------------------------------------------------===//
+
+/// If there's a single exit block, sink any loop-invariant values that
+/// were defined in the preheader but not used inside the loop into the
+/// exit block to reduce register pressure in the loop.
+void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
+  BasicBlock *ExitBlock = L->getExitBlock();
+  if (!ExitBlock) return;
+
+  BasicBlock *Preheader = L->getLoopPreheader();
+  if (!Preheader) return;
+
+  Instruction *InsertPt = ExitBlock->getFirstNonPHI();
+  BasicBlock::iterator I = Preheader->getTerminator();
+  while (I != Preheader->begin()) {
+    --I;
+    // New instructions were inserted at the end of the preheader.
+    if (isa<PHINode>(I))
+      break;
+
+    // Don't move instructions which might have side effects, since the side
+    // effects need to complete before instructions inside the loop.  Also don't
+    // move instructions which might read memory, since the loop may modify
+    // memory. Note that it's okay if the instruction might have undefined
+    // behavior: LoopSimplify guarantees that the preheader dominates the exit
+    // block.
+    if (I->mayHaveSideEffects() || I->mayReadFromMemory())
+      continue;
+
+    // Skip debug info intrinsics.
+    if (isa<DbgInfoIntrinsic>(I))
+      continue;
+
+    // Don't sink static AllocaInsts out of the entry block, which would
+    // turn them into dynamic allocas!
+    if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
+      if (AI->isStaticAlloca())
+        continue;
+
+    // Determine if there is a use in or before the loop (direct or
+    // otherwise).
+    bool UsedInLoop = false;
+    for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+         UI != UE; ++UI) {
+      User *U = *UI;
+      BasicBlock *UseBB = cast<Instruction>(U)->getParent();
+      if (PHINode *P = dyn_cast<PHINode>(U)) {
+        unsigned i =
+          PHINode::getIncomingValueNumForOperand(UI.getOperandNo());
+        UseBB = P->getIncomingBlock(i);
+      }
+      if (UseBB == Preheader || L->contains(UseBB)) {
+        UsedInLoop = true;
+        break;
+      }
+    }
+
+    // If there is, the def must remain in the preheader.
+    if (UsedInLoop)
+      continue;
+
+    // Otherwise, sink it to the exit block.
+    Instruction *ToMove = I;
+    bool Done = false;
+
+    if (I != Preheader->begin()) {
+      // Skip debug info intrinsics.
+      do {
+        --I;
+      } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
+
+      if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
+        Done = true;
+    } else {
+      Done = true;
+    }
+
+    ToMove->moveBefore(InsertPt);
+    if (Done) break;
+    InsertPt = ToMove;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//  IndVarSimplify driver. Manage several subpasses of IV simplification.
+//===----------------------------------------------------------------------===//
+
 bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   // If LoopSimplify form is not available, stay out of trouble. Some notes:
   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
@@ -1218,6 +1742,10 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   if (!DisableIVRewrite)
     SimplifyIVUsers(Rewriter);
 
+  // Eliminate redundant IV cycles.
+  if (DisableIVRewrite)
+    SimplifyCongruentIVs(L);
+
   // Compute the type of the largest recurrence expression, and decide whether
   // a canonical induction variable should be inserted.
   const Type *LargestType = 0;
@@ -1295,8 +1823,18 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
            "canonical IV disrupted BackedgeTaken expansion");
     assert(NeedCannIV &&
            "LinearFunctionTestReplace requires a canonical induction variable");
-    NewICmp = LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
-                                        Rewriter);
+    // Check preconditions for proper SCEVExpander operation. SCEV does not
+    // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
+    // pass that uses the SCEVExpander must do it. This does not work well for
+    // loop passes because SCEVExpander makes assumptions about all loops, while
+    // LoopPassManager only forces the current loop to be simplified.
+    //
+    // FIXME: SCEV expansion has no way to bail out, so the caller must
+    // explicitly check any assumptions made by SCEV. Brittle.
+    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
+    if (!AR || AR->getLoop()->getLoopPreheader())
+      NewICmp =
+        LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, Rewriter);
   }
   // Rewrite IV-derived expressions.
   if (!DisableIVRewrite)
@@ -1331,431 +1869,3 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   assert(L->isLCSSAForm(*DT) && "Indvars did not leave the loop in lcssa form!");
   return Changed;
 }
-
-// FIXME: It is an extremely bad idea to indvar substitute anything more
-// complex than affine induction variables.  Doing so will put expensive
-// polynomial evaluations inside of the loop, and the str reduction pass
-// currently can only reduce affine polynomials.  For now just disable
-// indvar subst on anything more complex than an affine addrec, unless
-// it can be expanded to a trivial value.
-static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
-  // Loop-invariant values are safe.
-  if (SE->isLoopInvariant(S, L)) return true;
-
-  // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how
-  // to transform them into efficient code.
-  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
-    return AR->isAffine();
-
-  // An add is safe it all its operands are safe.
-  if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) {
-    for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(),
-         E = Commutative->op_end(); I != E; ++I)
-      if (!isSafe(*I, L, SE)) return false;
-    return true;
-  }
-
-  // A cast is safe if its operand is.
-  if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
-    return isSafe(C->getOperand(), L, SE);
-
-  // A udiv is safe if its operands are.
-  if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S))
-    return isSafe(UD->getLHS(), L, SE) &&
-           isSafe(UD->getRHS(), L, SE);
-
-  // SCEVUnknown is always safe.
-  if (isa<SCEVUnknown>(S))
-    return true;
-
-  // Nothing else is safe.
-  return false;
-}
-
-void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
-  // Rewrite all induction variable expressions in terms of the canonical
-  // induction variable.
-  //
-  // If there were induction variables of other sizes or offsets, manually
-  // add the offsets to the primary induction variable and cast, avoiding
-  // the need for the code evaluation methods to insert induction variables
-  // of different sizes.
-  for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) {
-    Value *Op = UI->getOperandValToReplace();
-    const Type *UseTy = Op->getType();
-    Instruction *User = UI->getUser();
-
-    // Compute the final addrec to expand into code.
-    const SCEV *AR = IU->getReplacementExpr(*UI);
-
-    // Evaluate the expression out of the loop, if possible.
-    if (!L->contains(UI->getUser())) {
-      const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop());
-      if (SE->isLoopInvariant(ExitVal, L))
-        AR = ExitVal;
-    }
-
-    // FIXME: It is an extremely bad idea to indvar substitute anything more
-    // complex than affine induction variables.  Doing so will put expensive
-    // polynomial evaluations inside of the loop, and the str reduction pass
-    // currently can only reduce affine polynomials.  For now just disable
-    // indvar subst on anything more complex than an affine addrec, unless
-    // it can be expanded to a trivial value.
-    if (!isSafe(AR, L, SE))
-      continue;
-
-    // Determine the insertion point for this user. By default, insert
-    // immediately before the user. The SCEVExpander class will automatically
-    // hoist loop invariants out of the loop. For PHI nodes, there may be
-    // multiple uses, so compute the nearest common dominator for the
-    // incoming blocks.
-    Instruction *InsertPt = User;
-    if (PHINode *PHI = dyn_cast<PHINode>(InsertPt))
-      for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
-        if (PHI->getIncomingValue(i) == Op) {
-          if (InsertPt == User)
-            InsertPt = PHI->getIncomingBlock(i)->getTerminator();
-          else
-            InsertPt =
-              DT->findNearestCommonDominator(InsertPt->getParent(),
-                                             PHI->getIncomingBlock(i))
-                    ->getTerminator();
-        }
-
-    // Now expand it into actual Instructions and patch it into place.
-    Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
-
-    DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
-                 << "   into = " << *NewVal << "\n");
-
-    if (!isValidRewrite(Op, NewVal)) {
-      DeadInsts.push_back(NewVal);
-      continue;
-    }
-    // Inform ScalarEvolution that this value is changing. The change doesn't
-    // affect its value, but it does potentially affect which use lists the
-    // value will be on after the replacement, which affects ScalarEvolution's
-    // ability to walk use lists and drop dangling pointers when a value is
-    // deleted.
-    SE->forgetValue(User);
-
-    // Patch the new value into place.
-    if (Op->hasName())
-      NewVal->takeName(Op);
-    if (Instruction *NewValI = dyn_cast<Instruction>(NewVal))
-      NewValI->setDebugLoc(User->getDebugLoc());
-    User->replaceUsesOfWith(Op, NewVal);
-    UI->setOperandValToReplace(NewVal);
-
-    ++NumRemoved;
-    Changed = true;
-
-    // The old value may be dead now.
-    DeadInsts.push_back(Op);
-  }
-}
-
-/// If there's a single exit block, sink any loop-invariant values that
-/// were defined in the preheader but not used inside the loop into the
-/// exit block to reduce register pressure in the loop.
-void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
-  BasicBlock *ExitBlock = L->getExitBlock();
-  if (!ExitBlock) return;
-
-  BasicBlock *Preheader = L->getLoopPreheader();
-  if (!Preheader) return;
-
-  Instruction *InsertPt = ExitBlock->getFirstNonPHI();
-  BasicBlock::iterator I = Preheader->getTerminator();
-  while (I != Preheader->begin()) {
-    --I;
-    // New instructions were inserted at the end of the preheader.
-    if (isa<PHINode>(I))
-      break;
-
-    // Don't move instructions which might have side effects, since the side
-    // effects need to complete before instructions inside the loop.  Also don't
-    // move instructions which might read memory, since the loop may modify
-    // memory. Note that it's okay if the instruction might have undefined
-    // behavior: LoopSimplify guarantees that the preheader dominates the exit
-    // block.
-    if (I->mayHaveSideEffects() || I->mayReadFromMemory())
-      continue;
-
-    // Skip debug info intrinsics.
-    if (isa<DbgInfoIntrinsic>(I))
-      continue;
-
-    // Don't sink static AllocaInsts out of the entry block, which would
-    // turn them into dynamic allocas!
-    if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
-      if (AI->isStaticAlloca())
-        continue;
-
-    // Determine if there is a use in or before the loop (direct or
-    // otherwise).
-    bool UsedInLoop = false;
-    for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
-         UI != UE; ++UI) {
-      User *U = *UI;
-      BasicBlock *UseBB = cast<Instruction>(U)->getParent();
-      if (PHINode *P = dyn_cast<PHINode>(U)) {
-        unsigned i =
-          PHINode::getIncomingValueNumForOperand(UI.getOperandNo());
-        UseBB = P->getIncomingBlock(i);
-      }
-      if (UseBB == Preheader || L->contains(UseBB)) {
-        UsedInLoop = true;
-        break;
-      }
-    }
-
-    // If there is, the def must remain in the preheader.
-    if (UsedInLoop)
-      continue;
-
-    // Otherwise, sink it to the exit block.
-    Instruction *ToMove = I;
-    bool Done = false;
-
-    if (I != Preheader->begin()) {
-      // Skip debug info intrinsics.
-      do {
-        --I;
-      } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
-
-      if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
-        Done = true;
-    } else {
-      Done = true;
-    }
-
-    ToMove->moveBefore(InsertPt);
-    if (Done) break;
-    InsertPt = ToMove;
-  }
-}
-
-/// ConvertToSInt - Convert APF to an integer, if possible.
-static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
-  bool isExact = false;
-  if (&APF.getSemantics() == &APFloat::PPCDoubleDouble)
-    return false;
-  // See if we can convert this to an int64_t
-  uint64_t UIntVal;
-  if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero,
-                           &isExact) != APFloat::opOK || !isExact)
-    return false;
-  IntVal = UIntVal;
-  return true;
-}
-
-/// HandleFloatingPointIV - If the loop has floating induction variable
-/// then insert corresponding integer induction variable if possible.
-/// For example,
-/// for(double i = 0; i < 10000; ++i)
-///   bar(i)
-/// is converted into
-/// for(int i = 0; i < 10000; ++i)
-///   bar((double)i);
-///
-void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
-  unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
-  unsigned BackEdge     = IncomingEdge^1;
-
-  // Check incoming value.
-  ConstantFP *InitValueVal =
-    dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
-
-  int64_t InitValue;
-  if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
-    return;
-
-  // Check IV increment. Reject this PN if increment operation is not
-  // an add or increment value can not be represented by an integer.
-  BinaryOperator *Incr =
-    dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
-  if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return;
-
-  // If this is not an add of the PHI with a constantfp, or if the constant fp
-  // is not an integer, bail out.
-  ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
-  int64_t IncValue;
-  if (IncValueVal == 0 || Incr->getOperand(0) != PN ||
-      !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
-    return;
-
-  // Check Incr uses. One user is PN and the other user is an exit condition
-  // used by the conditional terminator.
-  Value::use_iterator IncrUse = Incr->use_begin();
-  Instruction *U1 = cast<Instruction>(*IncrUse++);
-  if (IncrUse == Incr->use_end()) return;
-  Instruction *U2 = cast<Instruction>(*IncrUse++);
-  if (IncrUse != Incr->use_end()) return;
-
-  // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
-  // only used by a branch, we can't transform it.
-  FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
-  if (!Compare)
-    Compare = dyn_cast<FCmpInst>(U2);
-  if (Compare == 0 || !Compare->hasOneUse() ||
-      !isa<BranchInst>(Compare->use_back()))
-    return;
-
-  BranchInst *TheBr = cast<BranchInst>(Compare->use_back());
-
-  // We need to verify that the branch actually controls the iteration count
-  // of the loop.  If not, the new IV can overflow and no one will notice.
-  // The branch block must be in the loop and one of the successors must be out
-  // of the loop.
-  assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
-  if (!L->contains(TheBr->getParent()) ||
-      (L->contains(TheBr->getSuccessor(0)) &&
-       L->contains(TheBr->getSuccessor(1))))
-    return;
-
-
-  // If it isn't a comparison with an integer-as-fp (the exit value), we can't
-  // transform it.
-  ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
-  int64_t ExitValue;
-  if (ExitValueVal == 0 ||
-      !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
-    return;
-
-  // Find new predicate for integer comparison.
-  CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
-  switch (Compare->getPredicate()) {
-  default: return;  // Unknown comparison.
-  case CmpInst::FCMP_OEQ:
-  case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
-  case CmpInst::FCMP_ONE:
-  case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
-  case CmpInst::FCMP_OGT:
-  case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
-  case CmpInst::FCMP_OGE:
-  case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
-  case CmpInst::FCMP_OLT:
-  case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
-  case CmpInst::FCMP_OLE:
-  case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
-  }
-
-  // We convert the floating point induction variable to a signed i32 value if
-  // we can.  This is only safe if the comparison will not overflow in a way
-  // that won't be trapped by the integer equivalent operations.  Check for this
-  // now.
-  // TODO: We could use i64 if it is native and the range requires it.
-
-  // The start/stride/exit values must all fit in signed i32.
-  if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
-    return;
-
-  // If not actually striding (add x, 0.0), avoid touching the code.
-  if (IncValue == 0)
-    return;
-
-  // Positive and negative strides have different safety conditions.
-  if (IncValue > 0) {
-    // If we have a positive stride, we require the init to be less than the
-    // exit value and an equality or less than comparison.
-    if (InitValue >= ExitValue ||
-        NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE)
-      return;
-
-    uint32_t Range = uint32_t(ExitValue-InitValue);
-    if (NewPred == CmpInst::ICMP_SLE) {
-      // Normalize SLE -> SLT, check for infinite loop.
-      if (++Range == 0) return;  // Range overflows.
-    }
-
-    unsigned Leftover = Range % uint32_t(IncValue);
-
-    // If this is an equality comparison, we require that the strided value
-    // exactly land on the exit value, otherwise the IV condition will wrap
-    // around and do things the fp IV wouldn't.
-    if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
-        Leftover != 0)
-      return;
-
-    // If the stride would wrap around the i32 before exiting, we can't
-    // transform the IV.
-    if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
-      return;
-
-  } else {
-    // If we have a negative stride, we require the init to be greater than the
-    // exit value and an equality or greater than comparison.
-    if (InitValue >= ExitValue ||
-        NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE)
-      return;
-
-    uint32_t Range = uint32_t(InitValue-ExitValue);
-    if (NewPred == CmpInst::ICMP_SGE) {
-      // Normalize SGE -> SGT, check for infinite loop.
-      if (++Range == 0) return;  // Range overflows.
-    }
-
-    unsigned Leftover = Range % uint32_t(-IncValue);
-
-    // If this is an equality comparison, we require that the strided value
-    // exactly land on the exit value, otherwise the IV condition will wrap
-    // around and do things the fp IV wouldn't.
-    if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
-        Leftover != 0)
-      return;
-
-    // If the stride would wrap around the i32 before exiting, we can't
-    // transform the IV.
-    if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
-      return;
-  }
-
-  const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
-
-  // Insert new integer induction variable.
-  PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
-  NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
-                      PN->getIncomingBlock(IncomingEdge));
-
-  Value *NewAdd =
-    BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
-                              Incr->getName()+".int", Incr);
-  NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
-
-  ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
-                                      ConstantInt::get(Int32Ty, ExitValue),
-                                      Compare->getName());
-
-  // In the following deletions, PN may become dead and may be deleted.
-  // Use a WeakVH to observe whether this happens.
-  WeakVH WeakPH = PN;
-
-  // Delete the old floating point exit comparison.  The branch starts using the
-  // new comparison.
-  NewCompare->takeName(Compare);
-  Compare->replaceAllUsesWith(NewCompare);
-  RecursivelyDeleteTriviallyDeadInstructions(Compare);
-
-  // Delete the old floating point increment.
-  Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
-  RecursivelyDeleteTriviallyDeadInstructions(Incr);
-
-  // If the FP induction variable still has uses, this is because something else
-  // in the loop uses its value.  In order to canonicalize the induction
-  // variable, we chose to eliminate the IV and rewrite it in terms of an
-  // int->fp cast.
-  //
-  // We give preference to sitofp over uitofp because it is faster on most
-  // platforms.
-  if (WeakPH) {
-    Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
-                                 PN->getParent()->getFirstNonPHI());
-    PN->replaceAllUsesWith(Conv);
-    RecursivelyDeleteTriviallyDeadInstructions(PN);
-  }
-
-  // Add a new IVUsers entry for the newly-created integer PHI.
-  if (IU)
-    IU->AddUsersIfInteresting(NewPHI);
-}
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 13bd022..66add6c 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -178,7 +178,7 @@ INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false)
 Pass *llvm::createLICMPass() { return new LICM(); }
 
 /// Hoist expressions out of the specified loop. Note, alias info for inner
-/// loop is not preserved so it is not a good idea to run LICM multiple 
+/// loop is not preserved so it is not a good idea to run LICM multiple
 /// times on one loop.
 ///
 bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
@@ -199,13 +199,13 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
 
     // What if InnerLoop was modified by other passes ?
     CurAST->add(*InnerAST);
-    
+
     // Once we've incorporated the inner loop's AST into ours, we don't need the
     // subloop's anymore.
     delete InnerAST;
     LoopToAliasSetMap.erase(InnerL);
   }
-  
+
   CurLoop = L;
 
   // Get the preheader block to move instructions into...
@@ -245,7 +245,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
          I != E; ++I)
       PromoteAliasSet(*I);
   }
-  
+
   // Clear out loops state information for the next iteration
   CurLoop = 0;
   Preheader = 0;
@@ -283,7 +283,7 @@ void LICM::SinkRegion(DomTreeNode *N) {
 
   for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) {
     Instruction &I = *--II;
-    
+
     // If the instruction is dead, we would try to sink it because it isn't used
     // in the loop, instead, just delete it.
     if (isInstructionTriviallyDead(&I)) {
@@ -336,7 +336,7 @@ void LICM::HoistRegion(DomTreeNode *N) {
         I.eraseFromParent();
         continue;
       }
-      
+
       // Try hoisting the instruction out to the preheader.  We can only do this
       // if all of the operands of the instruction are loop invariant and if it
       // is safe to hoist the instruction.
@@ -364,7 +364,7 @@ bool LICM::canSinkOrHoistInst(Instruction &I) {
     // in the same alias set as something that ends up being modified.
     if (AA->pointsToConstantMemory(LI->getOperand(0)))
       return true;
-    
+
     // Don't hoist loads which have may-aliased stores in loop.
     uint64_t Size = 0;
     if (LI->getType()->isSized())
@@ -470,7 +470,7 @@ void LICM::sink(Instruction &I) {
     }
     return;
   }
-  
+
   if (ExitBlocks.empty()) {
     // The instruction is actually dead if there ARE NO exit blocks.
     CurAST->deleteValue(&I);
@@ -482,30 +482,30 @@ void LICM::sink(Instruction &I) {
     I.eraseFromParent();
     return;
   }
-  
+
   // Otherwise, if we have multiple exits, use the SSAUpdater to do all of the
   // hard work of inserting PHI nodes as necessary.
   SmallVector<PHINode*, 8> NewPHIs;
   SSAUpdater SSA(&NewPHIs);
-  
+
   if (!I.use_empty())
     SSA.Initialize(I.getType(), I.getName());
-  
+
   // Insert a copy of the instruction in each exit block of the loop that is
   // dominated by the instruction.  Each exit block is known to only be in the
   // ExitBlocks list once.
   BasicBlock *InstOrigBB = I.getParent();
   unsigned NumInserted = 0;
-  
+
   for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
     BasicBlock *ExitBlock = ExitBlocks[i];
-    
+
     if (!DT->dominates(InstOrigBB, ExitBlock))
       continue;
-    
+
     // Insert the code after the last PHI node.
     BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI();
-    
+
     // If this is the first exit block processed, just move the original
     // instruction, otherwise clone the original instruction and insert
     // the copy.
@@ -519,12 +519,12 @@ void LICM::sink(Instruction &I) {
         New->setName(I.getName()+".le");
       ExitBlock->getInstList().insert(InsertPt, New);
     }
-    
+
     // Now that we have inserted the instruction, inform SSAUpdater.
     if (!I.use_empty())
       SSA.AddAvailableValue(ExitBlock, New);
   }
-  
+
   // If the instruction doesn't dominate any exit blocks, it must be dead.
   if (NumInserted == 0) {
     CurAST->deleteValue(&I);
@@ -533,7 +533,7 @@ void LICM::sink(Instruction &I) {
     I.eraseFromParent();
     return;
   }
-  
+
   // Next, rewrite uses of the instruction, inserting PHI nodes as needed.
   for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; ) {
     // Grab the use before incrementing the iterator.
@@ -542,12 +542,12 @@ void LICM::sink(Instruction &I) {
     ++UI;
     SSA.RewriteUseAfterInsertions(U);
   }
-  
+
   // Update CurAST for NewPHIs if I had pointer type.
   if (I.getType()->isPointerTy())
     for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
       CurAST->copyValue(&I, NewPHIs[i]);
-  
+
   // Finally, remove the instruction from CurAST.  It is no longer in the loop.
   CurAST->deleteValue(&I);
 }
@@ -606,15 +606,17 @@ namespace {
     SmallVectorImpl<BasicBlock*> &LoopExitBlocks;
     AliasSetTracker &AST;
     DebugLoc DL;
+    int Alignment;
   public:
     LoopPromoter(Value *SP,
                  const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
                  SmallPtrSet<Value*, 4> &PMA,
                  SmallVectorImpl<BasicBlock*> &LEB, AliasSetTracker &ast,
-                 DebugLoc dl)
-      : LoadAndStorePromoter(Insts, S, 0, 0), SomePtr(SP),
-        PointerMustAliases(PMA), LoopExitBlocks(LEB), AST(ast), DL(dl) {}
-    
+                 DebugLoc dl, int alignment)
+      : LoadAndStorePromoter(Insts, S), SomePtr(SP),
+        PointerMustAliases(PMA), LoopExitBlocks(LEB), AST(ast), DL(dl),
+        Alignment(alignment) {}
+
     virtual bool isInstInList(Instruction *I,
                               const SmallVectorImpl<Instruction*> &) const {
       Value *Ptr;
@@ -624,7 +626,7 @@ namespace {
         Ptr = cast<StoreInst>(I)->getPointerOperand();
       return PointerMustAliases.count(Ptr);
     }
-    
+
     virtual void doExtraRewritesBeforeFinalDeletion() const {
       // Insert stores after in the loop exit blocks.  Each exit block gets a
       // store of the live-out values that feed them.  Since we've already told
@@ -635,6 +637,7 @@ namespace {
         Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
         Instruction *InsertPos = ExitBlock->getFirstNonPHI();
         StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos);
+        NewSI->setAlignment(Alignment);
         NewSI->setDebugLoc(DL);
       }
     }
@@ -661,7 +664,7 @@ void LICM::PromoteAliasSet(AliasSet &AS) {
   if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
       AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue()))
     return;
-  
+
   assert(!AS.empty() &&
          "Must alias set should have at least one pointer element in it!");
   Value *SomePtr = AS.begin()->getValue();
@@ -676,60 +679,78 @@ void LICM::PromoteAliasSet(AliasSet &AS) {
   //    tmp = *P;  for () { if (c) tmp +=1; } *P = tmp;
   //
   // is not safe, because *P may only be valid to access if 'c' is true.
-  // 
+  //
   // It is safe to promote P if all uses are direct load/stores and if at
   // least one is guaranteed to be executed.
   bool GuaranteedToExecute = false;
-  
+
   SmallVector<Instruction*, 64> LoopUses;
   SmallPtrSet<Value*, 4> PointerMustAliases;
 
+  // We start with an alignment of one and try to find instructions that allow
+  // us to prove better alignment.
+  unsigned Alignment = 1;
+
   // Check that all of the pointers in the alias set have the same type.  We
   // cannot (yet) promote a memory location that is loaded and stored in
   // different sizes.
   for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
     Value *ASIV = ASI->getValue();
     PointerMustAliases.insert(ASIV);
-    
+
     // Check that all of the pointers in the alias set have the same type.  We
     // cannot (yet) promote a memory location that is loaded and stored in
     // different sizes.
     if (SomePtr->getType() != ASIV->getType())
       return;
-    
+
     for (Value::use_iterator UI = ASIV->use_begin(), UE = ASIV->use_end();
          UI != UE; ++UI) {
       // Ignore instructions that are outside the loop.
       Instruction *Use = dyn_cast<Instruction>(*UI);
       if (!Use || !CurLoop->contains(Use))
         continue;
-      
+
       // If there is an non-load/store instruction in the loop, we can't promote
       // it.
-      if (isa<LoadInst>(Use))
+      unsigned InstAlignment;
+      if (LoadInst *load = dyn_cast<LoadInst>(Use)) {
         assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken");
-      else if (isa<StoreInst>(Use)) {
+        InstAlignment = load->getAlignment();
+      } else if (StoreInst *store = dyn_cast<StoreInst>(Use)) {
         // Stores *of* the pointer are not interesting, only stores *to* the
         // pointer.
         if (Use->getOperand(1) != ASIV)
           continue;
+        InstAlignment = store->getAlignment();
         assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken");
       } else
         return; // Not a load or store.
-      
+
+      // If the alignment of this instruction allows us to specify a more
+      // restrictive (and performant) alignment and if we are sure this
+      // instruction will be executed, update the alignment.
+      // Larger is better, with the exception of 0 being the best alignment.
+      if ((InstAlignment > Alignment || InstAlignment == 0)
+          && (Alignment != 0))
+        if (isSafeToExecuteUnconditionally(*Use)) {
+          GuaranteedToExecute = true;
+          Alignment = InstAlignment;
+        }
+
       if (!GuaranteedToExecute)
         GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use);
-      
+
       LoopUses.push_back(Use);
     }
   }
-  
+
   // If there isn't a guaranteed-to-execute instruction, we can't promote.
   if (!GuaranteedToExecute)
     return;
-  
+
   // Otherwise, this is safe to promote, lets do it!
-  DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n');  
+  DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n');
   Changed = true;
   ++NumPromoted;
 
@@ -741,18 +762,19 @@ void LICM::PromoteAliasSet(AliasSet &AS) {
 
   SmallVector<BasicBlock*, 8> ExitBlocks;
   CurLoop->getUniqueExitBlocks(ExitBlocks);
-  
+
   // We use the SSAUpdater interface to insert phi nodes as required.
   SmallVector<PHINode*, 16> NewPHIs;
   SSAUpdater SSA(&NewPHIs);
   LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
-                        *CurAST, DL);
-  
+                        *CurAST, DL, Alignment);
+
   // Set up the preheader to have a definition of the value.  It is the live-out
   // value from the preheader that uses in the loop will use.
   LoadInst *PreheaderLoad =
     new LoadInst(SomePtr, SomePtr->getName()+".promoted",
                  Preheader->getTerminator());
+  PreheaderLoad->setAlignment(Alignment);
   PreheaderLoad->setDebugLoc(DL);
   SSA.AddAvailableValue(Preheader, PreheaderLoad);
 
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index a7bc0e0..a0e41d9 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -173,6 +173,11 @@ static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE) {
 bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
   CurLoop = L;
 
+  // Disable loop idiom recognition if the function's name is a common idiom. 
+  StringRef Name = L->getHeader()->getParent()->getName();
+  if (Name == "memset" || Name == "memcpy")
+    return false;
+
   // The trip count of the loop must be analyzable.
   SE = &getAnalysis<ScalarEvolution>();
   if (!SE->hasLoopInvariantBackedgeTakenCount(L))
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index c6ca99a..509d026 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -2767,7 +2767,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
         // value to the immediate would produce a value closer to zero than the
         // immediate itself, then the formula isn't worthwhile.
         if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
-          if (C->getValue()->getValue().isNegative() !=
+          if (C->getValue()->isNegative() !=
                 (NewF.AM.BaseOffs < 0) &&
               (C->getValue()->getValue().abs() * APInt(BitWidth, F.AM.Scale))
                 .ule(abs64(NewF.AM.BaseOffs)))
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index bd4c2d6..7ed3db6 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -840,11 +840,11 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
   
   // If not, then we know we can transform this.
   Module *Mod = M->getParent()->getParent()->getParent();
-  const Type *ArgTys[3] = { M->getRawDest()->getType(),
-                            M->getRawSource()->getType(),
-                            M->getLength()->getType() };
+  Type *ArgTys[3] = { M->getRawDest()->getType(),
+                      M->getRawSource()->getType(),
+                      M->getLength()->getType() };
   M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy,
-                                                 ArgTys, 3));
+                                                 ArgTys));
 
   // MemDep may have over conservative information about this instruction, just
   // conservatively flush it from the cache.
diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp
index 89a451e..ee132d3 100644
--- a/lib/Transforms/Scalar/ObjCARC.cpp
+++ b/lib/Transforms/Scalar/ObjCARC.cpp
@@ -1498,8 +1498,8 @@ void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const {
 Constant *ObjCARCOpt::getRetainRVCallee(Module *M) {
   if (!RetainRVCallee) {
     LLVMContext &C = M->getContext();
-    const Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
-    std::vector<const Type *> Params;
+    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+    std::vector<Type *> Params;
     Params.push_back(I8X);
     const FunctionType *FTy =
       FunctionType::get(I8X, Params, /*isVarArg=*/false);
@@ -1515,8 +1515,8 @@ Constant *ObjCARCOpt::getRetainRVCallee(Module *M) {
 Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) {
   if (!AutoreleaseRVCallee) {
     LLVMContext &C = M->getContext();
-    const Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
-    std::vector<const Type *> Params;
+    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+    std::vector<Type *> Params;
     Params.push_back(I8X);
     const FunctionType *FTy =
       FunctionType::get(I8X, Params, /*isVarArg=*/false);
@@ -1532,7 +1532,7 @@ Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) {
 Constant *ObjCARCOpt::getReleaseCallee(Module *M) {
   if (!ReleaseCallee) {
     LLVMContext &C = M->getContext();
-    std::vector<const Type *> Params;
+    std::vector<Type *> Params;
     Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C)));
     AttrListPtr Attributes;
     Attributes.addAttr(~0u, Attribute::NoUnwind);
@@ -1548,7 +1548,7 @@ Constant *ObjCARCOpt::getReleaseCallee(Module *M) {
 Constant *ObjCARCOpt::getRetainCallee(Module *M) {
   if (!RetainCallee) {
     LLVMContext &C = M->getContext();
-    std::vector<const Type *> Params;
+    std::vector<Type *> Params;
     Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C)));
     AttrListPtr Attributes;
     Attributes.addAttr(~0u, Attribute::NoUnwind);
@@ -1564,7 +1564,7 @@ Constant *ObjCARCOpt::getRetainCallee(Module *M) {
 Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) {
   if (!AutoreleaseCallee) {
     LLVMContext &C = M->getContext();
-    std::vector<const Type *> Params;
+    std::vector<Type *> Params;
     Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C)));
     AttrListPtr Attributes;
     Attributes.addAttr(~0u, Attribute::NoUnwind);
@@ -3269,9 +3269,9 @@ void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const {
 Constant *ObjCARCContract::getStoreStrongCallee(Module *M) {
   if (!StoreStrongCallee) {
     LLVMContext &C = M->getContext();
-    const Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
-    const Type *I8XX = PointerType::getUnqual(I8X);
-    std::vector<const Type *> Params;
+    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+    Type *I8XX = PointerType::getUnqual(I8X);
+    std::vector<Type *> Params;
     Params.push_back(I8XX);
     Params.push_back(I8X);
 
@@ -3291,8 +3291,8 @@ Constant *ObjCARCContract::getStoreStrongCallee(Module *M) {
 Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) {
   if (!RetainAutoreleaseCallee) {
     LLVMContext &C = M->getContext();
-    const Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
-    std::vector<const Type *> Params;
+    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+    std::vector<Type *> Params;
     Params.push_back(I8X);
     const FunctionType *FTy =
       FunctionType::get(I8X, Params, /*isVarArg=*/false);
@@ -3307,8 +3307,8 @@ Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) {
 Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) {
   if (!RetainAutoreleaseRVCallee) {
     LLVMContext &C = M->getContext();
-    const Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
-    std::vector<const Type *> Params;
+    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+    std::vector<Type *> Params;
     Params.push_back(I8X);
     const FunctionType *FTy =
       FunctionType::get(I8X, Params, /*isVarArg=*/false);
@@ -3421,7 +3421,7 @@ void ObjCARCContract::ContractRelease(Instruction *Release,
     Args[1] = new BitCastInst(Args[1], I8X, "", Store);
   CallInst *StoreStrong =
     CallInst::Create(getStoreStrongCallee(BB->getParent()->getParent()),
-                     Args, array_endof(Args), "", Store);
+                     Args, "", Store);
   StoreStrong->setDoesNotThrow();
   StoreStrong->setDebugLoc(Store->getDebugLoc());
 
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index c1dfe15..e6341ae 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -812,7 +812,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
       // because we can percolate the negate out.  Watch for minint, which
       // cannot be positivified.
       if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor))
-        if (CI->getValue().isNegative() && !CI->getValue().isMinSignedValue()) {
+        if (CI->isNegative() && !CI->isMinValue(true)) {
           Factor = ConstantInt::get(CI->getContext(), -CI->getValue());
           assert(!Duplicates.count(Factor) &&
                  "Shouldn't have two constant factors, missed a canonicalize");
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 158d653..302c287 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -48,6 +48,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeLoopUnswitchPass(Registry);
   initializeLoopIdiomRecognizePass(Registry);
   initializeLowerAtomicPass(Registry);
+  initializeLowerExpectIntrinsicPass(Registry);
   initializeMemCpyOptPass(Registry);
   initializeObjCARCAliasAnalysisPass(Registry);
   initializeObjCARCExpandPass(Registry);
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 87e364d..7d6349c 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -30,6 +30,7 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
+#include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Analysis/DIBuilder.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/Loads.h"
@@ -1094,16 +1095,37 @@ bool SROA::runOnFunction(Function &F) {
 namespace {
 class AllocaPromoter : public LoadAndStorePromoter {
   AllocaInst *AI;
+  DIBuilder *DIB;
+  SmallVector<DbgDeclareInst *, 4> DDIs;
+  SmallVector<DbgValueInst *, 4> DVIs;
 public:
   AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
-                 DbgDeclareInst *DD, DIBuilder *&DB)
-    : LoadAndStorePromoter(Insts, S, DD, DB), AI(0) {}
+                 DIBuilder *DB)
+    : LoadAndStorePromoter(Insts, S), AI(0), DIB(DB) {}
   
   void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) {
     // Remember which alloca we're promoting (for isInstInList).
     this->AI = AI;
+    if (MDNode *DebugNode = MDNode::getIfExists(AI->getContext(), AI))
+      for (Value::use_iterator UI = DebugNode->use_begin(),
+             E = DebugNode->use_end(); UI != E; ++UI)
+        if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI))
+          DDIs.push_back(DDI);
+        else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(*UI))
+          DVIs.push_back(DVI);
+
     LoadAndStorePromoter::run(Insts);
     AI->eraseFromParent();
+    for (SmallVector<DbgDeclareInst *, 4>::iterator I = DDIs.begin(), 
+           E = DDIs.end(); I != E; ++I) {
+      DbgDeclareInst *DDI = *I;
+      DDI->eraseFromParent();
+    }
+    for (SmallVector<DbgValueInst *, 4>::iterator I = DVIs.begin(), 
+           E = DVIs.end(); I != E; ++I) {
+      DbgValueInst *DVI = *I;
+      DVI->eraseFromParent();
+    }
   }
   
   virtual bool isInstInList(Instruction *I,
@@ -1112,6 +1134,45 @@ public:
       return LI->getOperand(0) == AI;
     return cast<StoreInst>(I)->getPointerOperand() == AI;
   }
+
+  virtual void updateDebugInfo(Instruction *Inst) const {
+    for (SmallVector<DbgDeclareInst *, 4>::const_iterator I = DDIs.begin(), 
+           E = DDIs.end(); I != E; ++I) {
+      DbgDeclareInst *DDI = *I;
+      if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+        ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
+      else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+        ConvertDebugDeclareToDebugValue(DDI, LI, *DIB);
+    }
+    for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(), 
+           E = DVIs.end(); I != E; ++I) {
+      DbgValueInst *DVI = *I;
+      if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+        Instruction *DbgVal = NULL;
+        // If an argument is zero extended then use argument directly. The ZExt
+        // may be zapped by an optimization pass in future.
+        Argument *ExtendedArg = NULL;
+        if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
+          ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0));
+        if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
+          ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0));
+        if (ExtendedArg)
+          DbgVal = DIB->insertDbgValueIntrinsic(ExtendedArg, 0, 
+                                                DIVariable(DVI->getVariable()),
+                                                SI);
+        else
+          DbgVal = DIB->insertDbgValueIntrinsic(SI->getOperand(0), 0, 
+                                                DIVariable(DVI->getVariable()),
+                                                SI);
+        DbgVal->setDebugLoc(DVI->getDebugLoc());
+      } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+        Instruction *DbgVal = 
+          DIB->insertDbgValueIntrinsic(LI->getOperand(0), 0, 
+                                       DIVariable(DVI->getVariable()), LI);
+        DbgVal->setDebugLoc(DVI->getDebugLoc());
+      }
+    }
+  }
 };
 } // end anon namespace
 
@@ -1381,10 +1442,9 @@ bool SROA::performPromotion(Function &F) {
     DT = &getAnalysis<DominatorTree>();
 
   BasicBlock &BB = F.getEntryBlock();  // Get the entry node for the function
-
+  DIBuilder DIB(*F.getParent());
   bool Changed = false;
   SmallVector<Instruction*, 64> Insts;
-  DIBuilder *DIB = 0;
   while (1) {
     Allocas.clear();
 
@@ -1408,11 +1468,7 @@ bool SROA::performPromotion(Function &F) {
         for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
              UI != E; ++UI)
           Insts.push_back(cast<Instruction>(*UI));
-
-        DbgDeclareInst *DDI = FindAllocaDbgDeclare(AI);
-        if (DDI && !DIB)
-          DIB = new DIBuilder(*AI->getParent()->getParent()->getParent());
-        AllocaPromoter(Insts, SSA, DDI, DIB).run(AI, Insts);
+        AllocaPromoter(Insts, SSA, &DIB).run(AI, Insts);
         Insts.clear();
       }
     }
@@ -1420,10 +1476,6 @@ bool SROA::performPromotion(Function &F) {
     Changed = true;
   }
 
-  // FIXME: Is there a better way to handle the lazy initialization of DIB
-  // so that there doesn't need to be an explicit delete?
-  delete DIB;
-
   return Changed;
 }
 
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 7e9cc80..a66b3e3 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -91,8 +91,7 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) {
 static void ChangeToCall(InvokeInst *II) {
   BasicBlock *BB = II->getParent();
   SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
-  CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args.begin(),
-                                       Args.end(), "", II);
+  CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II);
   NewCall->takeName(II);
   NewCall->setCallingConv(II->getCallingConv());
   NewCall->setAttributes(II->getAttributes());
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 6247b03..7c415e5 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -992,9 +992,9 @@ struct FFSOpt : public LibCallOptimization {
     }
 
     // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
-    const Type *ArgType = Op->getType();
+    Type *ArgType = Op->getType();
     Value *F = Intrinsic::getDeclaration(Callee->getParent(),
-                                         Intrinsic::cttz, &ArgType, 1);
+                                         Intrinsic::cttz, ArgType);
     Value *V = B.CreateCall(F, Op, "cttz");
     V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1), "tmp");
     V = B.CreateIntCast(V, B.getInt32Ty(), false, "tmp");
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 2df534f..204c2c6 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -14,6 +14,7 @@ add_llvm_library(LLVMTransformUtils
   Local.cpp
   LoopSimplify.cpp
   LoopUnroll.cpp
+  LowerExpectIntrinsic.cpp
   LowerInvoke.cpp
   LowerSwitch.cpp
   Mem2Reg.cpp
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 561b69d..6ea831f 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -140,7 +140,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
 Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap,
                               bool ModuleLevelChanges,
                               ClonedCodeInfo *CodeInfo) {
-  std::vector<const Type*> ArgTypes;
+  std::vector<Type*> ArgTypes;
 
   // The user might be deleting arguments to the function by specifying them in
   // the VMap.  If so, we need to not add the arguments to the arg ty vector
@@ -342,18 +342,6 @@ ConstantFoldMappedInstruction(const Instruction *I) {
                                   Ops.size(), TD);
 }
 
-static DebugLoc
-UpdateInlinedAtInfo(const DebugLoc &InsnDL, const DebugLoc &TheCallDL,
-                    LLVMContext &Ctx) {
-  DebugLoc NewLoc = TheCallDL;
-  if (MDNode *IA = InsnDL.getInlinedAt(Ctx))
-    NewLoc = UpdateInlinedAtInfo(DebugLoc::getFromDILocation(IA), TheCallDL,
-                                 Ctx);
-
-  return DebugLoc::get(InsnDL.getLine(), InsnDL.getCol(),
-                       InsnDL.getScope(Ctx), NewLoc.getAsMDNode(Ctx));
-}
-
 /// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
 /// except that it does some simple constant prop and DCE on the fly.  The
 /// effect of this is to copy significantly less code in cases where (for
@@ -418,50 +406,14 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
     if (PHINode *PN = dyn_cast<PHINode>(I)) {
       // Skip over all PHI nodes, remembering them for later.
       BasicBlock::const_iterator OldI = BI->begin();
-      for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI) {
-        if (I->hasMetadata()) {
-          if (!TheCallDL.isUnknown()) {
-            DebugLoc IDL = I->getDebugLoc();
-            if (!IDL.isUnknown()) {
-              DebugLoc NewDL = UpdateInlinedAtInfo(IDL, TheCallDL,
-                                                   I->getContext());
-              I->setDebugLoc(NewDL);
-            }
-          } else {
-            // The cloned instruction has dbg info but the call instruction
-            // does not have dbg info. Remove dbg info from cloned instruction.
-            I->setDebugLoc(DebugLoc());
-          }
-        }
+      for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI)
         PHIToResolve.push_back(cast<PHINode>(OldI));
-      }
     }
     
-    // FIXME:
-    // FIXME:
-    // FIXME: Unclone all this metadata stuff.
-    // FIXME:
-    // FIXME:
-    
     // Otherwise, remap the rest of the instructions normally.
-    for (; I != NewBB->end(); ++I) {
-      if (I->hasMetadata()) {
-        if (!TheCallDL.isUnknown()) {
-          DebugLoc IDL = I->getDebugLoc();
-          if (!IDL.isUnknown()) {
-            DebugLoc NewDL = UpdateInlinedAtInfo(IDL, TheCallDL,
-                                                 I->getContext());
-            I->setDebugLoc(NewDL);
-          }
-        } else {
-          // The cloned instruction has dbg info but the call instruction
-          // does not have dbg info. Remove dbg info from cloned instruction.
-          I->setDebugLoc(DebugLoc());
-        }
-      }
+    for (; I != NewBB->end(); ++I)
       RemapInstruction(I, VMap,
                        ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
-    }
   }
   
   // Defer PHI resolution until rest of function is resolved, PHI resolution
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index 1046c38..a08fa35 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -15,7 +15,6 @@
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Module.h"
 #include "llvm/DerivedTypes.h"
-#include "llvm/TypeSymbolTable.h"
 #include "llvm/Constant.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 using namespace llvm;
@@ -32,20 +31,13 @@ Module *llvm::CloneModule(const Module *M) {
   return CloneModule(M, VMap);
 }
 
-Module *llvm::CloneModule(const Module *M,
-                          ValueToValueMapTy &VMap) {
-  // First off, we need to create the new module...
+Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
+  // First off, we need to create the new module.
   Module *New = new Module(M->getModuleIdentifier(), M->getContext());
   New->setDataLayout(M->getDataLayout());
   New->setTargetTriple(M->getTargetTriple());
   New->setModuleInlineAsm(M->getModuleInlineAsm());
-
-  // Copy all of the type symbol table entries over.
-  const TypeSymbolTable &TST = M->getTypeSymbolTable();
-  for (TypeSymbolTable::const_iterator TI = TST.begin(), TE = TST.end(); 
-       TI != TE; ++TI)
-    New->addTypeName(TI->first, TI->second);
-  
+   
   // Copy all of the dependent libraries over.
   for (Module::lib_iterator I = M->lib_begin(), E = M->lib_end(); I != E; ++I)
     New->addLibrary(*I);
@@ -88,8 +80,7 @@ Module *llvm::CloneModule(const Module *M,
        I != E; ++I) {
     GlobalVariable *GV = cast<GlobalVariable>(VMap[I]);
     if (I->hasInitializer())
-      GV->setInitializer(cast<Constant>(MapValue(I->getInitializer(),
-                                                 VMap, RF_None)));
+      GV->setInitializer(MapValue(I->getInitializer(), VMap));
     GV->setLinkage(I->getLinkage());
     GV->setThreadLocal(I->isThreadLocal());
     GV->setConstant(I->isConstant());
@@ -119,8 +110,8 @@ Module *llvm::CloneModule(const Module *M,
        I != E; ++I) {
     GlobalAlias *GA = cast<GlobalAlias>(VMap[I]);
     GA->setLinkage(I->getLinkage());
-    if (const Constant* C = I->getAliasee())
-      GA->setAliasee(cast<Constant>(MapValue(C, VMap, RF_None)));
+    if (const Constant *C = I->getAliasee())
+      GA->setAliasee(MapValue(C, VMap));
   }
 
   // And named metadata....
@@ -129,8 +120,7 @@ Module *llvm::CloneModule(const Module *M,
     const NamedMDNode &NMD = *I;
     NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName());
     for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i)
-      NewNMD->addOperand(cast<MDNode>(MapValue(NMD.getOperand(i), VMap,
-                                               RF_None)));
+      NewNMD->addOperand(MapValue(NMD.getOperand(i), VMap));
   }
 
   return New;
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 8c133ea..0813523 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -258,7 +258,7 @@ Function *CodeExtractor::constructFunction(const Values &inputs,
   default: RetTy = Type::getInt16Ty(header->getContext()); break;
   }
 
-  std::vector<const Type*> paramTy;
+  std::vector<Type*> paramTy;
 
   // Add the types of the input values to the function's argument list
   for (Values::const_iterator i = inputs.begin(),
@@ -279,7 +279,7 @@ Function *CodeExtractor::constructFunction(const Values &inputs,
   }
 
   DEBUG(dbgs() << "Function type: " << *RetTy << " f(");
-  for (std::vector<const Type*>::iterator i = paramTy.begin(),
+  for (std::vector<Type*>::iterator i = paramTy.begin(),
          e = paramTy.end(); i != e; ++i)
     DEBUG(dbgs() << **i << ", ");
   DEBUG(dbgs() << ")\n");
@@ -403,7 +403,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
 
   AllocaInst *Struct = 0;
   if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
-    std::vector<const Type*> ArgTypes;
+    std::vector<Type*> ArgTypes;
     for (Values::iterator v = StructValues.begin(),
            ve = StructValues.end(); v != ve; ++v)
       ArgTypes.push_back((*v)->getType());
@@ -429,7 +429,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
   }
 
   // Emit the call to the function
-  CallInst *call = CallInst::Create(newFunction, params.begin(), params.end(),
+  CallInst *call = CallInst::Create(newFunction, params,
                                     NumExitBlocks > 1 ? "targetBlock" : "");
   codeReplacer->getInstList().push_back(call);
 
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 18ecd61..d5b382e 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -450,9 +450,7 @@ static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
         NewSelector.push_back(Outer->getArgOperand(i));
 
       CallInst *NewInner =
-        IRBuilder<>(Inner).CreateCall(Inner->getCalledValue(),
-                                      NewSelector.begin(),
-                                      NewSelector.end());
+        IRBuilder<>(Inner).CreateCall(Inner->getCalledValue(), NewSelector);
       // No need to copy attributes, calling convention, etc.
       NewInner->takeName(Inner);
       Inner->replaceAllUsesWith(NewInner);
@@ -488,8 +486,7 @@ static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
     InvokeInst *II =
       InvokeInst::Create(CI->getCalledValue(), Split,
                          Invoke.getOuterUnwindDest(),
-                         InvokeArgs.begin(), InvokeArgs.end(),
-                         CI->getName(), BB);
+                         InvokeArgs, CI->getName(), BB);
     II->setCallingConv(CI->getCallingConv());
     II->setAttributes(CI->getAttributes());
     
@@ -663,7 +660,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
   
   LLVMContext &Context = Arg->getContext();
 
-  const Type *VoidPtrTy = Type::getInt8PtrTy(Context);
+  Type *VoidPtrTy = Type::getInt8PtrTy(Context);
   
   // Create the alloca.  If we have TargetData, use nice alignment.
   unsigned Align = 1;
@@ -680,10 +677,10 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
   Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(), 
                                     &*Caller->begin()->begin());
   // Emit a memcpy.
-  const Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
+  Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
   Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(),
                                                  Intrinsic::memcpy, 
-                                                 Tys, 3);
+                                                 Tys);
   Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
   Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall);
   
@@ -702,7 +699,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
     ConstantInt::get(Type::getInt32Ty(Context), 1),
     ConstantInt::getFalse(Context) // isVolatile
   };
-  IRBuilder<>(TheCall).CreateCall(MemCpyFn, CallArgs, CallArgs+5);
+  IRBuilder<>(TheCall).CreateCall(MemCpyFn, CallArgs);
   
   // Uses of the argument in the function should use our new alloca
   // instead.
@@ -744,6 +741,41 @@ static bool hasLifetimeMarkers(AllocaInst *AI) {
   return false;
 }
 
+/// updateInlinedAtInfo - Helper function used by fixupLineNumbers to recursively
+/// update InlinedAtEntry of a DebugLoc.
+static DebugLoc updateInlinedAtInfo(const DebugLoc &DL, 
+                                    const DebugLoc &InlinedAtDL,
+                                    LLVMContext &Ctx) {
+  if (MDNode *IA = DL.getInlinedAt(Ctx)) {
+    DebugLoc NewInlinedAtDL 
+      = updateInlinedAtInfo(DebugLoc::getFromDILocation(IA), InlinedAtDL, Ctx);
+    return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx),
+                         NewInlinedAtDL.getAsMDNode(Ctx));
+  }
+                                             
+  return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx),
+                       InlinedAtDL.getAsMDNode(Ctx));
+}
+
+
+/// fixupLineNumbers - Update inlined instructions' line numbers to 
+/// to encode location where these instructions are inlined.
+static void fixupLineNumbers(Function *Fn, Function::iterator FI,
+                              Instruction *TheCall) {
+  DebugLoc TheCallDL = TheCall->getDebugLoc();
+  if (TheCallDL.isUnknown())
+    return;
+
+  for (; FI != Fn->end(); ++FI) {
+    for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
+         BI != BE; ++BI) {
+      DebugLoc DL = BI->getDebugLoc();
+      if (!DL.isUnknown())
+        BI->setDebugLoc(updateInlinedAtInfo(DL, TheCallDL, BI->getContext()));
+    }
+  }
+}
+
 // InlineFunction - This function inlines the called function into the basic
 // block of the caller.  This returns false if it is not possible to inline this
 // call.  The program is still in a well defined state if this occurs though.
@@ -846,6 +878,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
     // Update the callgraph if requested.
     if (IFI.CG)
       UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);
+
+    // Update inlined instructions' line number information.
+    fixupLineNumbers(Caller, FirstNewBlock, TheCall);
   }
 
   // If there are any alloca instructions in the block that used to be the entry
diff --git a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
new file mode 100644
index 0000000..c1213fa
--- /dev/null
+++ b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
@@ -0,0 +1,166 @@
+#define DEBUG_TYPE "lower-expect-intrinsic"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Metadata.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include <vector>
+
+using namespace llvm;
+
+STATISTIC(IfHandled, "Number of 'expect' intrinsic intructions handled");
+
+static cl::opt<uint32_t>
+LikelyBranchWeight("likely-branch-weight", cl::Hidden, cl::init(64),
+                   cl::desc("Weight of the branch likely to be taken (default = 64)"));
+static cl::opt<uint32_t>
+UnlikelyBranchWeight("unlikely-branch-weight", cl::Hidden, cl::init(4),
+                   cl::desc("Weight of the branch unlikely to be taken (default = 4)"));
+
+namespace {
+
+  class LowerExpectIntrinsic : public FunctionPass {
+
+    bool HandleSwitchExpect(SwitchInst *SI);
+
+    bool HandleIfExpect(BranchInst *BI);
+
+  public:
+    static char ID;
+    LowerExpectIntrinsic() : FunctionPass(ID) {
+      initializeLowerExpectIntrinsicPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnFunction(Function &F);
+  };
+}
+
+
+bool LowerExpectIntrinsic::HandleSwitchExpect(SwitchInst *SI) {
+  CallInst *CI = dyn_cast<CallInst>(SI->getCondition());
+  if (!CI)
+    return false;
+
+  Function *Fn = CI->getCalledFunction();
+  if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect)
+    return false;
+
+  Value *ArgValue = CI->getArgOperand(0);
+  ConstantInt *ExpectedValue = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+  if (!ExpectedValue)
+    return false;
+
+  LLVMContext &Context = CI->getContext();
+  const Type *Int32Ty = Type::getInt32Ty(Context);
+
+  unsigned caseNo = SI->findCaseValue(ExpectedValue);
+  std::vector<Value *> Vec;
+  unsigned n = SI->getNumCases();
+  Vec.resize(n + 1); // +1 for MDString
+
+  Vec[0] = MDString::get(Context, "branch_weights");
+  for (unsigned i = 0; i < n; ++i) {
+    Vec[i + 1] = ConstantInt::get(Int32Ty, i == caseNo ? LikelyBranchWeight : UnlikelyBranchWeight);
+  }
+
+  MDNode *WeightsNode = llvm::MDNode::get(Context, Vec);
+  SI->setMetadata(LLVMContext::MD_prof, WeightsNode);
+
+  SI->setCondition(ArgValue);
+  return true;
+}
+
+
+bool LowerExpectIntrinsic::HandleIfExpect(BranchInst *BI) {
+  if (BI->isUnconditional())
+    return false;
+
+  // Handle non-optimized IR code like:
+  //   %expval = call i64 @llvm.expect.i64.i64(i64 %conv1, i64 1)
+  //   %tobool = icmp ne i64 %expval, 0
+  //   br i1 %tobool, label %if.then, label %if.end
+
+  ICmpInst *CmpI = dyn_cast<ICmpInst>(BI->getCondition());
+  if (!CmpI || CmpI->getPredicate() != CmpInst::ICMP_NE)
+    return false;
+
+  CallInst *CI = dyn_cast<CallInst>(CmpI->getOperand(0));
+  if (!CI)
+    return false;
+
+  Function *Fn = CI->getCalledFunction();
+  if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect)
+    return false;
+
+  Value *ArgValue = CI->getArgOperand(0);
+  ConstantInt *ExpectedValue = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+  if (!ExpectedValue)
+    return false;
+
+  LLVMContext &Context = CI->getContext();
+  const Type *Int32Ty = Type::getInt32Ty(Context);
+  bool Likely = ExpectedValue->isOne();
+
+  // If expect value is equal to 1 it means that we are more likely to take
+  // branch 0, in other case more likely is branch 1.
+  Value *Ops[] = {
+    MDString::get(Context, "branch_weights"),
+    ConstantInt::get(Int32Ty, Likely ? LikelyBranchWeight : UnlikelyBranchWeight),
+    ConstantInt::get(Int32Ty, Likely ? UnlikelyBranchWeight : LikelyBranchWeight)
+  };
+
+  MDNode *WeightsNode = MDNode::get(Context, Ops);
+  BI->setMetadata(LLVMContext::MD_prof, WeightsNode);
+
+  CmpI->setOperand(0, ArgValue);
+  return true;
+}
+
+
+bool LowerExpectIntrinsic::runOnFunction(Function &F) {
+  for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
+    BasicBlock *BB = I++;
+
+    // Create "block_weights" metadata.
+    if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+      if (HandleIfExpect(BI))
+        IfHandled++;
+    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+      if (HandleSwitchExpect(SI))
+        IfHandled++;
+    }
+
+    // remove llvm.expect intrinsics.
+    for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
+         BI != BE; ) {
+      CallInst *CI = dyn_cast<CallInst>(BI++);
+      if (!CI)
+        continue;
+
+      Function *Fn = CI->getCalledFunction();
+      if (Fn && Fn->getIntrinsicID() == Intrinsic::expect) {
+        Value *Exp = CI->getArgOperand(0);
+        CI->replaceAllUsesWith(Exp);
+        CI->eraseFromParent();
+      }
+    }
+  }
+
+  return false;
+}
+
+
+char LowerExpectIntrinsic::ID = 0;
+INITIALIZE_PASS(LowerExpectIntrinsic, "lower-expect", "Lower 'expect' "
+                "Intrinsics", false, false)
+
+FunctionPass *llvm::createLowerExpectIntrinsicPass() {
+  return new LowerExpectIntrinsic();
+}
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index 025ae0d..f77d19d 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -66,7 +66,7 @@ namespace {
     Constant *AbortFn;
 
     // Used for expensive EH support.
-    const Type *JBLinkTy;
+    StructType *JBLinkTy;
     GlobalVariable *JBListHead;
     Constant *SetJmpFn, *LongJmpFn, *StackSaveFn, *StackRestoreFn;
     bool useExpensiveEHSupport;
@@ -120,24 +120,16 @@ FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI,
 // doInitialization - Make sure that there is a prototype for abort in the
 // current module.
 bool LowerInvoke::doInitialization(Module &M) {
-  const Type *VoidPtrTy =
-          Type::getInt8PtrTy(M.getContext());
+  const Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
   if (useExpensiveEHSupport) {
     // Insert a type for the linked list of jump buffers.
     unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0;
     JBSize = JBSize ? JBSize : 200;
-    const Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize);
-
-    { // The type is recursive, so use a type holder.
-      std::vector<const Type*> Elements;
-      Elements.push_back(JmpBufTy);
-      OpaqueType *OT = OpaqueType::get(M.getContext());
-      Elements.push_back(PointerType::getUnqual(OT));
-      PATypeHolder JBLType(StructType::get(M.getContext(), Elements));
-      OT->refineAbstractTypeTo(JBLType.get());  // Complete the cycle.
-      JBLinkTy = JBLType.get();
-      M.addTypeName("llvm.sjljeh.jmpbufty", JBLinkTy);
-    }
+    Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize);
+
+    JBLinkTy = StructType::createNamed(M.getContext(), "llvm.sjljeh.jmpbufty");
+    Type *Elts[] = { JmpBufTy, PointerType::getUnqual(JBLinkTy) };
+    JBLinkTy->setBody(Elts);
 
     const Type *PtrJBList = PointerType::getUnqual(JBLinkTy);
 
@@ -184,8 +176,7 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) {
       SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3);
       // Insert a normal call instruction...
       CallInst *NewCall = CallInst::Create(II->getCalledValue(),
-                                           CallArgs.begin(), CallArgs.end(),
-                                           "",II);
+                                           CallArgs, "", II);
       NewCall->takeName(II);
       NewCall->setCallingConv(II->getCallingConv());
       NewCall->setAttributes(II->getAttributes());
@@ -265,8 +256,7 @@ void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
   // Insert a normal call instruction.
   SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3);
   CallInst *NewCall = CallInst::Create(II->getCalledValue(),
-                                       CallArgs.begin(), CallArgs.end(), "",
-                                       II);
+                                       CallArgs, "", II);
   NewCall->takeName(II);
   NewCall->setCallingConv(II->getCallingConv());
   NewCall->setAttributes(II->getAttributes());
@@ -573,7 +563,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
              Type::getInt8PtrTy(F.getContext()),
                            "tmp", UnwindBlock);
   Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1);
-  CallInst::Create(LongJmpFn, &Idx[0], &Idx[2], "", UnwindBlock);
+  CallInst::Create(LongJmpFn, Idx, "", UnwindBlock);
   new UnreachableInst(F.getContext(), UnwindBlock);
 
   // Set up the term block ("throw without a catch").
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index b336194..b47a7cc 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -16,7 +16,6 @@
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/Analysis/DIBuilder.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Allocator.h"
@@ -358,8 +357,7 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
 
 LoadAndStorePromoter::
 LoadAndStorePromoter(const SmallVectorImpl<Instruction*> &Insts,
-                     SSAUpdater &S, DbgDeclareInst *DD, DIBuilder *DB,
-                     StringRef BaseName) : SSA(S), DDI(DD), DIB(DB) {
+                     SSAUpdater &S, StringRef BaseName) : SSA(S) {
   if (Insts.empty()) return;
   
   Value *SomeVal;
@@ -407,8 +405,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
     if (BlockUses.size() == 1) {
       // If it is a store, it is a trivial def of the value in the block.
       if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
-        if (DDI)
-          ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
+        updateDebugInfo(SI);
         SSA.AddAvailableValue(BB, SI->getOperand(0));
       } else 
         // Otherwise it is a load, queue it to rewrite as a live-in load.
@@ -462,9 +459,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
       if (StoreInst *SI = dyn_cast<StoreInst>(II)) {
         // If this is a store to an unrelated pointer, ignore it.
         if (!isInstInList(SI, Insts)) continue;
-
-        if (DDI)
-          ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
+        updateDebugInfo(SI);
 
         // Remember that this is the active value in the block.
         StoredValue = SI->getOperand(0);
@@ -522,7 +517,4 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
     instructionDeleted(User);
     User->eraseFromParent();
   }
-
-  if (DDI)
-    DDI->eraseFromParent();
 }
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 49726d5..9d9c324 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -2211,8 +2211,7 @@ bool SimplifyCFGOpt::SimplifyUnwind(UnwindInst *UI, IRBuilder<> &Builder) {
       SmallVector<Value*,8> Args(II->op_begin(), II->op_end()-3);
       Builder.SetInsertPoint(BI);
       CallInst *CI = Builder.CreateCall(II->getCalledValue(),
-                                        Args.begin(), Args.end(),
-                                        II->getName());
+                                        Args, II->getName());
       CI->setCallingConv(II->getCallingConv());
       CI->setAttributes(II->getAttributes());
       // If the invoke produced a value, the Call now does instead.
@@ -2355,8 +2354,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
         SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3);
         Builder.SetInsertPoint(BI);
         CallInst *CI = Builder.CreateCall(II->getCalledValue(),
-                                          Args.begin(), Args.end(),
-                                          II->getName());
+                                          Args, II->getName());
         CI->setCallingConv(II->getCallingConv());
         CI->setAttributes(II->getAttributes());
         // If the invoke produced a value, the call does now instead.
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index de6cbdc..973b105 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -13,16 +13,18 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/ValueMapper.h"
-#include "llvm/Type.h"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
+#include "llvm/InlineAsm.h"
 #include "llvm/Instructions.h"
 #include "llvm/Metadata.h"
-#include "llvm/ADT/SmallVector.h"
 using namespace llvm;
 
-Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM,
-                      RemapFlags Flags) {
+// Out of line method to get vtable etc for class.
+void ValueMapTypeRemapper::Anchor() {}
+
+Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
+                      ValueMapTypeRemapper *TypeMapper) {
   ValueToValueMapTy::iterator I = VM.find(V);
   
   // If the value already exists in the map, use it.
@@ -30,8 +32,23 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM,
   
   // Global values do not need to be seeded into the VM if they
   // are using the identity mapping.
-  if (isa<GlobalValue>(V) || isa<InlineAsm>(V) || isa<MDString>(V))
+  if (isa<GlobalValue>(V) || isa<MDString>(V))
     return VM[V] = const_cast<Value*>(V);
+  
+  if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) {
+    // Inline asm may need *type* remapping.
+    FunctionType *NewTy = IA->getFunctionType();
+    if (TypeMapper) {
+      NewTy = cast<FunctionType>(TypeMapper->remapType(NewTy));
+
+      if (NewTy != IA->getFunctionType())
+        V = InlineAsm::get(NewTy, IA->getAsmString(), IA->getConstraintString(),
+                           IA->hasSideEffects(), IA->isAlignStack());
+    }
+    
+    return VM[V] = const_cast<Value*>(V);
+  }
+  
 
   if (const MDNode *MD = dyn_cast<MDNode>(V)) {
     // If this is a module-level metadata and we know that nothing at the module
@@ -46,14 +63,14 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM,
     // Check all operands to see if any need to be remapped.
     for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) {
       Value *OP = MD->getOperand(i);
-      if (OP == 0 || MapValue(OP, VM, Flags) == OP) continue;
+      if (OP == 0 || MapValue(OP, VM, Flags, TypeMapper) == OP) continue;
 
       // Ok, at least one operand needs remapping.  
       SmallVector<Value*, 4> Elts;
       Elts.reserve(MD->getNumOperands());
       for (i = 0; i != e; ++i) {
         Value *Op = MD->getOperand(i);
-        Elts.push_back(Op ? MapValue(Op, VM, Flags) : 0);
+        Elts.push_back(Op ? MapValue(Op, VM, Flags, TypeMapper) : 0);
       }
       MDNode *NewMD = MDNode::get(V->getContext(), Elts);
       Dummy->replaceAllUsesWith(NewMD);
@@ -76,51 +93,75 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM,
     return 0;
   
   if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
-    Function *F = cast<Function>(MapValue(BA->getFunction(), VM, Flags));
+    Function *F = 
+      cast<Function>(MapValue(BA->getFunction(), VM, Flags, TypeMapper));
     BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(), VM,
-                                                       Flags));
+                                                       Flags, TypeMapper));
     return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock());
   }
   
-  for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
-    Value *Op = C->getOperand(i);
-    Value *Mapped = MapValue(Op, VM, Flags);
-    if (Mapped == C) continue;
-    
-    // Okay, the operands don't all match.  We've already processed some or all
-    // of the operands, set them up now.
-    std::vector<Constant*> Ops;
-    Ops.reserve(C->getNumOperands());
-    for (unsigned j = 0; j != i; ++j)
-      Ops.push_back(cast<Constant>(C->getOperand(i)));
+  // Otherwise, we have some other constant to remap.  Start by checking to see
+  // if all operands have an identity remapping.
+  unsigned OpNo = 0, NumOperands = C->getNumOperands();
+  Value *Mapped = 0;
+  for (; OpNo != NumOperands; ++OpNo) {
+    Value *Op = C->getOperand(OpNo);
+    Mapped = MapValue(Op, VM, Flags, TypeMapper);
+    if (Mapped != C) break;
+  }
+  
+  // See if the type mapper wants to remap the type as well.
+  Type *NewTy = C->getType();
+  if (TypeMapper)
+    NewTy = TypeMapper->remapType(NewTy);
+
+  // If the result type and all operands match up, then just insert an identity
+  // mapping.
+  if (OpNo == NumOperands && NewTy == C->getType())
+    return VM[V] = C;
+  
+  // Okay, we need to create a new constant.  We've already processed some or
+  // all of the operands, set them all up now.
+  SmallVector<Constant*, 8> Ops;
+  Ops.reserve(NumOperands);
+  for (unsigned j = 0; j != OpNo; ++j)
+    Ops.push_back(cast<Constant>(C->getOperand(j)));
+  
+  // If one of the operands mismatch, push it and the other mapped operands.
+  if (OpNo != NumOperands) {
     Ops.push_back(cast<Constant>(Mapped));
-    
+  
     // Map the rest of the operands that aren't processed yet.
-    for (++i; i != e; ++i)
-      Ops.push_back(cast<Constant>(MapValue(C->getOperand(i), VM, Flags)));
-    
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-      return VM[V] = CE->getWithOperands(Ops);
-    if (ConstantArray *CA = dyn_cast<ConstantArray>(C))
-      return VM[V] = ConstantArray::get(CA->getType(), Ops);
-    if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C))
-      return VM[V] = ConstantStruct::get(CS->getType(), Ops);
-    assert(isa<ConstantVector>(C) && "Unknown mapped constant type");
-    return VM[V] = ConstantVector::get(Ops);
+    for (++OpNo; OpNo != NumOperands; ++OpNo)
+      Ops.push_back(MapValue(cast<Constant>(C->getOperand(OpNo)), VM,
+                             Flags, TypeMapper));
   }
-
-  // If we reach here, all of the operands of the constant match.
-  return VM[V] = C;
+  
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+    return VM[V] = CE->getWithOperands(Ops, NewTy);
+  if (isa<ConstantArray>(C))
+    return VM[V] = ConstantArray::get(cast<ArrayType>(NewTy), Ops);
+  if (isa<ConstantStruct>(C))
+    return VM[V] = ConstantStruct::get(cast<StructType>(NewTy), Ops);
+  if (isa<ConstantVector>(C))
+    return VM[V] = ConstantVector::get(Ops);
+  // If this is a no-operand constant, it must be because the type was remapped.
+  if (isa<UndefValue>(C))
+    return VM[V] = UndefValue::get(NewTy);
+  if (isa<ConstantAggregateZero>(C))
+    return VM[V] = ConstantAggregateZero::get(NewTy);
+  assert(isa<ConstantPointerNull>(C));
+  return VM[V] = ConstantPointerNull::get(cast<PointerType>(NewTy));
 }
 
 /// RemapInstruction - Convert the instruction operands from referencing the
 /// current values into those specified by VMap.
 ///
 void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
-                            RemapFlags Flags) {
+                            RemapFlags Flags, ValueMapTypeRemapper *TypeMapper){
   // Remap operands.
   for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) {
-    Value *V = MapValue(*op, VMap, Flags);
+    Value *V = MapValue(*op, VMap, Flags, TypeMapper);
     // If we aren't ignoring missing entries, assert that something happened.
     if (V != 0)
       *op = V;
@@ -142,14 +183,19 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
     }
   }
 
-  // Remap attached metadata.
+  // Remap attached metadata.  Don't bother remapping DebugLoc, it can never
+  // have mappings to do.
   SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
-  I->getAllMetadata(MDs);
+  I->getAllMetadataOtherThanDebugLoc(MDs);
   for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator
        MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) {
-    Value *Old = MI->second;
-    Value *New = MapValue(Old, VMap, Flags);
+    MDNode *Old = MI->second;
+    MDNode *New = MapValue(Old, VMap, Flags, TypeMapper);
     if (New != Old)
-      I->setMetadata(MI->first, cast<MDNode>(New));
+      I->setMetadata(MI->first, New);
   }
+  
+  // If the instruction's type is being remapped, do so now.
+  if (TypeMapper)
+    I->mutateType(TypeMapper->remapType(I->getType()));
 }